bdev.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 1991, 1992 Linus Torvalds
  4. * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
  5. * Copyright (C) 2016 - 2020 Christoph Hellwig
  6. */
  7. #include <linux/init.h>
  8. #include <linux/mm.h>
  9. #include <linux/slab.h>
  10. #include <linux/kmod.h>
  11. #include <linux/major.h>
  12. #include <linux/device_cgroup.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/blk-integrity.h>
  15. #include <linux/backing-dev.h>
  16. #include <linux/module.h>
  17. #include <linux/blkpg.h>
  18. #include <linux/magic.h>
  19. #include <linux/buffer_head.h>
  20. #include <linux/swap.h>
  21. #include <linux/writeback.h>
  22. #include <linux/mount.h>
  23. #include <linux/pseudo_fs.h>
  24. #include <linux/uio.h>
  25. #include <linux/namei.h>
  26. #include <linux/security.h>
  27. #include <linux/part_stat.h>
  28. #include <linux/uaccess.h>
  29. #include <linux/stat.h>
  30. #include "../fs/internal.h"
  31. #include "blk.h"
  32. /* Should we allow writing to mounted block devices? */
  33. static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
  34. struct bdev_inode {
  35. struct block_device bdev;
  36. struct inode vfs_inode;
  37. };
  38. static inline struct bdev_inode *BDEV_I(struct inode *inode)
  39. {
  40. return container_of(inode, struct bdev_inode, vfs_inode);
  41. }
  42. static inline struct inode *BD_INODE(struct block_device *bdev)
  43. {
  44. return &container_of(bdev, struct bdev_inode, bdev)->vfs_inode;
  45. }
  46. struct block_device *I_BDEV(struct inode *inode)
  47. {
  48. return &BDEV_I(inode)->bdev;
  49. }
  50. EXPORT_SYMBOL(I_BDEV);
  51. struct block_device *file_bdev(struct file *bdev_file)
  52. {
  53. return I_BDEV(bdev_file->f_mapping->host);
  54. }
  55. EXPORT_SYMBOL(file_bdev);
  56. static void bdev_write_inode(struct block_device *bdev)
  57. {
  58. struct inode *inode = BD_INODE(bdev);
  59. int ret;
  60. spin_lock(&inode->i_lock);
  61. while (inode_state_read(inode) & I_DIRTY) {
  62. spin_unlock(&inode->i_lock);
  63. ret = write_inode_now(inode, true);
  64. if (ret)
  65. pr_warn_ratelimited(
  66. "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n",
  67. bdev, ret);
  68. spin_lock(&inode->i_lock);
  69. }
  70. spin_unlock(&inode->i_lock);
  71. }
  72. /* Kill _all_ buffers and pagecache , dirty or not.. */
  73. static void kill_bdev(struct block_device *bdev)
  74. {
  75. struct address_space *mapping = bdev->bd_mapping;
  76. if (mapping_empty(mapping))
  77. return;
  78. invalidate_bh_lrus();
  79. truncate_inode_pages(mapping, 0);
  80. }
  81. /* Invalidate clean unused buffers and pagecache. */
  82. void invalidate_bdev(struct block_device *bdev)
  83. {
  84. struct address_space *mapping = bdev->bd_mapping;
  85. if (mapping->nrpages) {
  86. invalidate_bh_lrus();
  87. lru_add_drain_all(); /* make sure all lru add caches are flushed */
  88. invalidate_mapping_pages(mapping, 0, -1);
  89. }
  90. }
  91. EXPORT_SYMBOL(invalidate_bdev);
  92. /*
  93. * Drop all buffers & page cache for given bdev range. This function bails
  94. * with error if bdev has other exclusive owner (such as filesystem).
  95. */
  96. int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
  97. loff_t lstart, loff_t lend)
  98. {
  99. /*
  100. * If we don't hold exclusive handle for the device, upgrade to it
  101. * while we discard the buffer cache to avoid discarding buffers
  102. * under live filesystem.
  103. */
  104. if (!(mode & BLK_OPEN_EXCL)) {
  105. int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
  106. if (err)
  107. goto invalidate;
  108. }
  109. truncate_inode_pages_range(bdev->bd_mapping, lstart, lend);
  110. if (!(mode & BLK_OPEN_EXCL))
  111. bd_abort_claiming(bdev, truncate_bdev_range);
  112. return 0;
  113. invalidate:
  114. /*
  115. * Someone else has handle exclusively open. Try invalidating instead.
  116. * The 'end' argument is inclusive so the rounding is safe.
  117. */
  118. return invalidate_inode_pages2_range(bdev->bd_mapping,
  119. lstart >> PAGE_SHIFT,
  120. lend >> PAGE_SHIFT);
  121. }
  122. static void set_init_blocksize(struct block_device *bdev)
  123. {
  124. unsigned int bsize = bdev_logical_block_size(bdev);
  125. loff_t size = i_size_read(BD_INODE(bdev));
  126. while (bsize < PAGE_SIZE) {
  127. if (size & bsize)
  128. break;
  129. bsize <<= 1;
  130. }
  131. BD_INODE(bdev)->i_blkbits = blksize_bits(bsize);
  132. mapping_set_folio_min_order(BD_INODE(bdev)->i_mapping,
  133. get_order(bsize));
  134. }
  135. /**
  136. * bdev_validate_blocksize - check that this block size is acceptable
  137. * @bdev: blockdevice to check
  138. * @block_size: block size to check
  139. *
  140. * For block device users that do not use buffer heads or the block device
  141. * page cache, make sure that this block size can be used with the device.
  142. *
  143. * Return: On success zero is returned, negative error code on failure.
  144. */
  145. int bdev_validate_blocksize(struct block_device *bdev, int block_size)
  146. {
  147. if (blk_validate_block_size(block_size))
  148. return -EINVAL;
  149. /* Size cannot be smaller than the size supported by the device */
  150. if (block_size < bdev_logical_block_size(bdev))
  151. return -EINVAL;
  152. return 0;
  153. }
  154. EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
  155. int set_blocksize(struct file *file, int size)
  156. {
  157. struct inode *inode = file->f_mapping->host;
  158. struct block_device *bdev = I_BDEV(inode);
  159. int ret;
  160. ret = bdev_validate_blocksize(bdev, size);
  161. if (ret)
  162. return ret;
  163. if (!file->private_data)
  164. return -EINVAL;
  165. /* Don't change the size if it is same as current */
  166. if (inode->i_blkbits != blksize_bits(size)) {
  167. /*
  168. * Flush and truncate the pagecache before we reconfigure the
  169. * mapping geometry because folio sizes are variable now. If a
  170. * reader has already allocated a folio whose size is smaller
  171. * than the new min_order but invokes readahead after the new
  172. * min_order becomes visible, readahead will think there are
  173. * "zero" blocks per folio and crash. Take the inode and
  174. * invalidation locks to avoid racing with
  175. * read/write/fallocate.
  176. */
  177. inode_lock(inode);
  178. filemap_invalidate_lock(inode->i_mapping);
  179. sync_blockdev(bdev);
  180. kill_bdev(bdev);
  181. inode->i_blkbits = blksize_bits(size);
  182. mapping_set_folio_min_order(inode->i_mapping, get_order(size));
  183. filemap_invalidate_unlock(inode->i_mapping);
  184. inode_unlock(inode);
  185. }
  186. return 0;
  187. }
  188. EXPORT_SYMBOL(set_blocksize);
  189. static int sb_validate_large_blocksize(struct super_block *sb, int size)
  190. {
  191. const char *err_str = NULL;
  192. if (!(sb->s_type->fs_flags & FS_LBS))
  193. err_str = "not supported by filesystem";
  194. else if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  195. err_str = "is only supported with CONFIG_TRANSPARENT_HUGEPAGE";
  196. if (!err_str)
  197. return 0;
  198. pr_warn_ratelimited("%s: block size(%d) > page size(%lu) %s\n",
  199. sb->s_type->name, size, PAGE_SIZE, err_str);
  200. return -EINVAL;
  201. }
  202. int sb_set_blocksize(struct super_block *sb, int size)
  203. {
  204. if (size > PAGE_SIZE && sb_validate_large_blocksize(sb, size))
  205. return 0;
  206. if (set_blocksize(sb->s_bdev_file, size))
  207. return 0;
  208. /* If we get here, we know size is validated */
  209. sb->s_blocksize = size;
  210. sb->s_blocksize_bits = blksize_bits(size);
  211. return sb->s_blocksize;
  212. }
  213. EXPORT_SYMBOL(sb_set_blocksize);
  214. int __must_check sb_min_blocksize(struct super_block *sb, int size)
  215. {
  216. int minsize = bdev_logical_block_size(sb->s_bdev);
  217. if (size < minsize)
  218. size = minsize;
  219. return sb_set_blocksize(sb, size);
  220. }
  221. EXPORT_SYMBOL(sb_min_blocksize);
  222. int sync_blockdev_nowait(struct block_device *bdev)
  223. {
  224. if (!bdev)
  225. return 0;
  226. return filemap_flush(bdev->bd_mapping);
  227. }
  228. EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
  229. /*
  230. * Write out and wait upon all the dirty data associated with a block
  231. * device via its mapping. Does not take the superblock lock.
  232. */
  233. int sync_blockdev(struct block_device *bdev)
  234. {
  235. if (!bdev)
  236. return 0;
  237. return filemap_write_and_wait(bdev->bd_mapping);
  238. }
  239. EXPORT_SYMBOL(sync_blockdev);
  240. int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
  241. {
  242. return filemap_write_and_wait_range(bdev->bd_mapping,
  243. lstart, lend);
  244. }
  245. EXPORT_SYMBOL(sync_blockdev_range);
  246. /**
  247. * bdev_freeze - lock a filesystem and force it into a consistent state
  248. * @bdev: blockdevice to lock
  249. *
  250. * If a superblock is found on this device, we take the s_umount semaphore
  251. * on it to make sure nobody unmounts until the snapshot creation is done.
  252. * The reference counter (bd_fsfreeze_count) guarantees that only the last
  253. * unfreeze process can unfreeze the frozen filesystem actually when multiple
  254. * freeze requests arrive simultaneously. It counts up in bdev_freeze() and
  255. * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze
  256. * actually.
  257. *
  258. * Return: On success zero is returned, negative error code on failure.
  259. */
  260. int bdev_freeze(struct block_device *bdev)
  261. {
  262. int error = 0;
  263. mutex_lock(&bdev->bd_fsfreeze_mutex);
  264. if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
  265. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  266. return 0;
  267. }
  268. mutex_lock(&bdev->bd_holder_lock);
  269. if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
  270. error = bdev->bd_holder_ops->freeze(bdev);
  271. lockdep_assert_not_held(&bdev->bd_holder_lock);
  272. } else {
  273. mutex_unlock(&bdev->bd_holder_lock);
  274. error = sync_blockdev(bdev);
  275. }
  276. if (error)
  277. atomic_dec(&bdev->bd_fsfreeze_count);
  278. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  279. return error;
  280. }
  281. EXPORT_SYMBOL(bdev_freeze);
  282. /**
  283. * bdev_thaw - unlock filesystem
  284. * @bdev: blockdevice to unlock
  285. *
  286. * Unlocks the filesystem and marks it writeable again after bdev_freeze().
  287. *
  288. * Return: On success zero is returned, negative error code on failure.
  289. */
  290. int bdev_thaw(struct block_device *bdev)
  291. {
  292. int error = -EINVAL, nr_freeze;
  293. mutex_lock(&bdev->bd_fsfreeze_mutex);
  294. /*
  295. * If this returns < 0 it means that @bd_fsfreeze_count was
  296. * already 0 and no decrement was performed.
  297. */
  298. nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
  299. if (nr_freeze < 0)
  300. goto out;
  301. error = 0;
  302. if (nr_freeze > 0)
  303. goto out;
  304. mutex_lock(&bdev->bd_holder_lock);
  305. if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
  306. error = bdev->bd_holder_ops->thaw(bdev);
  307. lockdep_assert_not_held(&bdev->bd_holder_lock);
  308. } else {
  309. mutex_unlock(&bdev->bd_holder_lock);
  310. }
  311. if (error)
  312. atomic_inc(&bdev->bd_fsfreeze_count);
  313. out:
  314. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  315. return error;
  316. }
  317. EXPORT_SYMBOL(bdev_thaw);
  318. /*
  319. * pseudo-fs
  320. */
  321. static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock);
  322. static struct kmem_cache *bdev_cachep __ro_after_init;
  323. static struct inode *bdev_alloc_inode(struct super_block *sb)
  324. {
  325. struct bdev_inode *ei = alloc_inode_sb(sb, bdev_cachep, GFP_KERNEL);
  326. if (!ei)
  327. return NULL;
  328. memset(&ei->bdev, 0, sizeof(ei->bdev));
  329. if (security_bdev_alloc(&ei->bdev)) {
  330. kmem_cache_free(bdev_cachep, ei);
  331. return NULL;
  332. }
  333. return &ei->vfs_inode;
  334. }
  335. static void bdev_free_inode(struct inode *inode)
  336. {
  337. struct block_device *bdev = I_BDEV(inode);
  338. free_percpu(bdev->bd_stats);
  339. kfree(bdev->bd_meta_info);
  340. security_bdev_free(bdev);
  341. if (!bdev_is_partition(bdev)) {
  342. if (bdev->bd_disk && bdev->bd_disk->bdi)
  343. bdi_put(bdev->bd_disk->bdi);
  344. kfree(bdev->bd_disk);
  345. }
  346. if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
  347. blk_free_ext_minor(MINOR(bdev->bd_dev));
  348. kmem_cache_free(bdev_cachep, BDEV_I(inode));
  349. }
  350. static void init_once(void *data)
  351. {
  352. struct bdev_inode *ei = data;
  353. inode_init_once(&ei->vfs_inode);
  354. }
  355. static void bdev_evict_inode(struct inode *inode)
  356. {
  357. truncate_inode_pages_final(&inode->i_data);
  358. invalidate_inode_buffers(inode); /* is it needed here? */
  359. clear_inode(inode);
  360. }
  361. static const struct super_operations bdev_sops = {
  362. .statfs = simple_statfs,
  363. .alloc_inode = bdev_alloc_inode,
  364. .free_inode = bdev_free_inode,
  365. .drop_inode = inode_just_drop,
  366. .evict_inode = bdev_evict_inode,
  367. };
  368. static int bd_init_fs_context(struct fs_context *fc)
  369. {
  370. struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
  371. if (!ctx)
  372. return -ENOMEM;
  373. fc->s_iflags |= SB_I_CGROUPWB;
  374. ctx->ops = &bdev_sops;
  375. return 0;
  376. }
  377. static struct file_system_type bd_type = {
  378. .name = "bdev",
  379. .init_fs_context = bd_init_fs_context,
  380. .kill_sb = kill_anon_super,
  381. };
  382. struct super_block *blockdev_superblock __ro_after_init;
  383. static struct vfsmount *blockdev_mnt __ro_after_init;
  384. EXPORT_SYMBOL_GPL(blockdev_superblock);
  385. void __init bdev_cache_init(void)
  386. {
  387. int err;
  388. bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
  389. 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
  390. SLAB_ACCOUNT|SLAB_PANIC),
  391. init_once);
  392. err = register_filesystem(&bd_type);
  393. if (err)
  394. panic("Cannot register bdev pseudo-fs");
  395. blockdev_mnt = kern_mount(&bd_type);
  396. if (IS_ERR(blockdev_mnt))
  397. panic("Cannot create bdev pseudo-fs");
  398. blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
  399. }
  400. struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
  401. {
  402. struct block_device *bdev;
  403. struct inode *inode;
  404. inode = new_inode(blockdev_superblock);
  405. if (!inode)
  406. return NULL;
  407. inode->i_mode = S_IFBLK;
  408. inode->i_rdev = 0;
  409. inode->i_data.a_ops = &def_blk_aops;
  410. mapping_set_gfp_mask(&inode->i_data, GFP_USER);
  411. bdev = I_BDEV(inode);
  412. mutex_init(&bdev->bd_fsfreeze_mutex);
  413. spin_lock_init(&bdev->bd_size_lock);
  414. mutex_init(&bdev->bd_holder_lock);
  415. atomic_set(&bdev->__bd_flags, partno);
  416. bdev->bd_mapping = &inode->i_data;
  417. bdev->bd_queue = disk->queue;
  418. if (partno && bdev_test_flag(disk->part0, BD_HAS_SUBMIT_BIO))
  419. bdev_set_flag(bdev, BD_HAS_SUBMIT_BIO);
  420. bdev->bd_stats = alloc_percpu(struct disk_stats);
  421. if (!bdev->bd_stats) {
  422. iput(inode);
  423. return NULL;
  424. }
  425. bdev->bd_disk = disk;
  426. return bdev;
  427. }
  428. void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
  429. {
  430. spin_lock(&bdev->bd_size_lock);
  431. i_size_write(BD_INODE(bdev), (loff_t)sectors << SECTOR_SHIFT);
  432. bdev->bd_nr_sectors = sectors;
  433. spin_unlock(&bdev->bd_size_lock);
  434. }
  435. void bdev_add(struct block_device *bdev, dev_t dev)
  436. {
  437. struct inode *inode = BD_INODE(bdev);
  438. if (bdev_stable_writes(bdev))
  439. mapping_set_stable_writes(bdev->bd_mapping);
  440. bdev->bd_dev = dev;
  441. inode->i_rdev = dev;
  442. inode->i_ino = dev;
  443. insert_inode_hash(inode);
  444. }
  445. void bdev_unhash(struct block_device *bdev)
  446. {
  447. remove_inode_hash(BD_INODE(bdev));
  448. }
  449. void bdev_drop(struct block_device *bdev)
  450. {
  451. iput(BD_INODE(bdev));
  452. }
  453. long nr_blockdev_pages(void)
  454. {
  455. struct inode *inode;
  456. long ret = 0;
  457. spin_lock(&blockdev_superblock->s_inode_list_lock);
  458. list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
  459. ret += inode->i_mapping->nrpages;
  460. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  461. return ret;
  462. }
  463. /**
  464. * bd_may_claim - test whether a block device can be claimed
  465. * @bdev: block device of interest
  466. * @holder: holder trying to claim @bdev
  467. * @hops: holder ops
  468. *
  469. * Test whether @bdev can be claimed by @holder.
  470. *
  471. * RETURNS:
  472. * %true if @bdev can be claimed, %false otherwise.
  473. */
  474. static bool bd_may_claim(struct block_device *bdev, void *holder,
  475. const struct blk_holder_ops *hops)
  476. {
  477. struct block_device *whole = bdev_whole(bdev);
  478. lockdep_assert_held(&bdev_lock);
  479. if (bdev->bd_holder) {
  480. /*
  481. * The same holder can always re-claim.
  482. */
  483. if (bdev->bd_holder == holder) {
  484. if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
  485. return false;
  486. return true;
  487. }
  488. return false;
  489. }
  490. /*
  491. * If the whole devices holder is set to bd_may_claim, a partition on
  492. * the device is claimed, but not the whole device.
  493. */
  494. if (whole != bdev &&
  495. whole->bd_holder && whole->bd_holder != bd_may_claim)
  496. return false;
  497. return true;
  498. }
  499. /**
  500. * bd_prepare_to_claim - claim a block device
  501. * @bdev: block device of interest
  502. * @holder: holder trying to claim @bdev
  503. * @hops: holder ops.
  504. *
  505. * Claim @bdev. This function fails if @bdev is already claimed by another
  506. * holder and waits if another claiming is in progress. return, the caller
  507. * has ownership of bd_claiming and bd_holder[s].
  508. *
  509. * RETURNS:
  510. * 0 if @bdev can be claimed, -EBUSY otherwise.
  511. */
  512. int bd_prepare_to_claim(struct block_device *bdev, void *holder,
  513. const struct blk_holder_ops *hops)
  514. {
  515. struct block_device *whole = bdev_whole(bdev);
  516. if (WARN_ON_ONCE(!holder))
  517. return -EINVAL;
  518. retry:
  519. mutex_lock(&bdev_lock);
  520. /* if someone else claimed, fail */
  521. if (!bd_may_claim(bdev, holder, hops)) {
  522. mutex_unlock(&bdev_lock);
  523. return -EBUSY;
  524. }
  525. /* if claiming is already in progress, wait for it to finish */
  526. if (whole->bd_claiming) {
  527. wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming);
  528. DEFINE_WAIT(wait);
  529. prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
  530. mutex_unlock(&bdev_lock);
  531. schedule();
  532. finish_wait(wq, &wait);
  533. goto retry;
  534. }
  535. /* yay, all mine */
  536. whole->bd_claiming = holder;
  537. mutex_unlock(&bdev_lock);
  538. return 0;
  539. }
  540. EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
  541. static void bd_clear_claiming(struct block_device *whole, void *holder)
  542. {
  543. lockdep_assert_held(&bdev_lock);
  544. /* tell others that we're done */
  545. BUG_ON(whole->bd_claiming != holder);
  546. whole->bd_claiming = NULL;
  547. wake_up_var(&whole->bd_claiming);
  548. }
  549. /**
  550. * bd_finish_claiming - finish claiming of a block device
  551. * @bdev: block device of interest
  552. * @holder: holder that has claimed @bdev
  553. * @hops: block device holder operations
  554. *
  555. * Finish exclusive open of a block device. Mark the device as exlusively
  556. * open by the holder and wake up all waiters for exclusive open to finish.
  557. */
  558. static void bd_finish_claiming(struct block_device *bdev, void *holder,
  559. const struct blk_holder_ops *hops)
  560. {
  561. struct block_device *whole = bdev_whole(bdev);
  562. mutex_lock(&bdev_lock);
  563. BUG_ON(!bd_may_claim(bdev, holder, hops));
  564. /*
  565. * Note that for a whole device bd_holders will be incremented twice,
  566. * and bd_holder will be set to bd_may_claim before being set to holder
  567. */
  568. whole->bd_holders++;
  569. whole->bd_holder = bd_may_claim;
  570. bdev->bd_holders++;
  571. mutex_lock(&bdev->bd_holder_lock);
  572. bdev->bd_holder = holder;
  573. bdev->bd_holder_ops = hops;
  574. mutex_unlock(&bdev->bd_holder_lock);
  575. bd_clear_claiming(whole, holder);
  576. mutex_unlock(&bdev_lock);
  577. }
  578. /**
  579. * bd_abort_claiming - abort claiming of a block device
  580. * @bdev: block device of interest
  581. * @holder: holder that has claimed @bdev
  582. *
  583. * Abort claiming of a block device when the exclusive open failed. This can be
  584. * also used when exclusive open is not actually desired and we just needed
  585. * to block other exclusive openers for a while.
  586. */
  587. void bd_abort_claiming(struct block_device *bdev, void *holder)
  588. {
  589. mutex_lock(&bdev_lock);
  590. bd_clear_claiming(bdev_whole(bdev), holder);
  591. mutex_unlock(&bdev_lock);
  592. }
  593. EXPORT_SYMBOL(bd_abort_claiming);
  594. static void bd_end_claim(struct block_device *bdev, void *holder)
  595. {
  596. struct block_device *whole = bdev_whole(bdev);
  597. bool unblock = false;
  598. /*
  599. * Release a claim on the device. The holder fields are protected with
  600. * bdev_lock. open_mutex is used to synchronize disk_holder unlinking.
  601. */
  602. mutex_lock(&bdev_lock);
  603. WARN_ON_ONCE(bdev->bd_holder != holder);
  604. WARN_ON_ONCE(--bdev->bd_holders < 0);
  605. WARN_ON_ONCE(--whole->bd_holders < 0);
  606. if (!bdev->bd_holders) {
  607. mutex_lock(&bdev->bd_holder_lock);
  608. bdev->bd_holder = NULL;
  609. bdev->bd_holder_ops = NULL;
  610. mutex_unlock(&bdev->bd_holder_lock);
  611. if (bdev_test_flag(bdev, BD_WRITE_HOLDER))
  612. unblock = true;
  613. }
  614. if (!whole->bd_holders)
  615. whole->bd_holder = NULL;
  616. mutex_unlock(&bdev_lock);
  617. /*
  618. * If this was the last claim, remove holder link and unblock evpoll if
  619. * it was a write holder.
  620. */
  621. if (unblock) {
  622. disk_unblock_events(bdev->bd_disk);
  623. bdev_clear_flag(bdev, BD_WRITE_HOLDER);
  624. }
  625. }
  626. static void blkdev_flush_mapping(struct block_device *bdev)
  627. {
  628. WARN_ON_ONCE(bdev->bd_holders);
  629. sync_blockdev(bdev);
  630. kill_bdev(bdev);
  631. bdev_write_inode(bdev);
  632. }
  633. static void blkdev_put_whole(struct block_device *bdev)
  634. {
  635. if (atomic_dec_and_test(&bdev->bd_openers))
  636. blkdev_flush_mapping(bdev);
  637. if (bdev->bd_disk->fops->release)
  638. bdev->bd_disk->fops->release(bdev->bd_disk);
  639. }
  640. static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
  641. {
  642. struct gendisk *disk = bdev->bd_disk;
  643. int ret;
  644. if (disk->fops->open) {
  645. ret = disk->fops->open(disk, mode);
  646. if (ret) {
  647. /* avoid ghost partitions on a removed medium */
  648. if (ret == -ENOMEDIUM &&
  649. test_bit(GD_NEED_PART_SCAN, &disk->state))
  650. bdev_disk_changed(disk, true);
  651. return ret;
  652. }
  653. }
  654. if (!atomic_read(&bdev->bd_openers))
  655. set_init_blocksize(bdev);
  656. atomic_inc(&bdev->bd_openers);
  657. if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
  658. /*
  659. * Only return scanning errors if we are called from contexts
  660. * that explicitly want them, e.g. the BLKRRPART ioctl.
  661. */
  662. ret = bdev_disk_changed(disk, false);
  663. if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
  664. blkdev_put_whole(bdev);
  665. return ret;
  666. }
  667. }
  668. return 0;
  669. }
  670. static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
  671. {
  672. struct gendisk *disk = part->bd_disk;
  673. int ret;
  674. ret = blkdev_get_whole(bdev_whole(part), mode);
  675. if (ret)
  676. return ret;
  677. ret = -ENXIO;
  678. if (!bdev_nr_sectors(part))
  679. goto out_blkdev_put;
  680. if (!atomic_read(&part->bd_openers)) {
  681. disk->open_partitions++;
  682. set_init_blocksize(part);
  683. }
  684. atomic_inc(&part->bd_openers);
  685. return 0;
  686. out_blkdev_put:
  687. blkdev_put_whole(bdev_whole(part));
  688. return ret;
  689. }
  690. int bdev_permission(dev_t dev, blk_mode_t mode, void *holder)
  691. {
  692. int ret;
  693. ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
  694. MAJOR(dev), MINOR(dev),
  695. ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
  696. ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
  697. if (ret)
  698. return ret;
  699. /* Blocking writes requires exclusive opener */
  700. if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
  701. return -EINVAL;
  702. /*
  703. * We're using error pointers to indicate to ->release() when we
  704. * failed to open that block device. Also this doesn't make sense.
  705. */
  706. if (WARN_ON_ONCE(IS_ERR(holder)))
  707. return -EINVAL;
  708. return 0;
  709. }
  710. static void blkdev_put_part(struct block_device *part)
  711. {
  712. struct block_device *whole = bdev_whole(part);
  713. if (atomic_dec_and_test(&part->bd_openers)) {
  714. blkdev_flush_mapping(part);
  715. whole->bd_disk->open_partitions--;
  716. }
  717. blkdev_put_whole(whole);
  718. }
  719. struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
  720. {
  721. struct block_device *bdev;
  722. struct inode *inode;
  723. inode = ilookup(blockdev_superblock, dev);
  724. if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
  725. blk_request_module(dev);
  726. inode = ilookup(blockdev_superblock, dev);
  727. if (inode)
  728. pr_warn_ratelimited(
  729. "block device autoloading is deprecated and will be removed.\n");
  730. }
  731. if (!inode)
  732. return NULL;
  733. /* switch from the inode reference to a device mode one: */
  734. bdev = &BDEV_I(inode)->bdev;
  735. if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
  736. bdev = NULL;
  737. iput(inode);
  738. return bdev;
  739. }
  740. void blkdev_put_no_open(struct block_device *bdev)
  741. {
  742. put_device(&bdev->bd_device);
  743. }
  744. static bool bdev_writes_blocked(struct block_device *bdev)
  745. {
  746. return bdev->bd_writers < 0;
  747. }
  748. static void bdev_block_writes(struct block_device *bdev)
  749. {
  750. bdev->bd_writers--;
  751. }
  752. static void bdev_unblock_writes(struct block_device *bdev)
  753. {
  754. bdev->bd_writers++;
  755. }
  756. static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
  757. {
  758. if (bdev_allow_write_mounted)
  759. return true;
  760. /* Writes blocked? */
  761. if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
  762. return false;
  763. if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
  764. return false;
  765. return true;
  766. }
  767. static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
  768. {
  769. if (bdev_allow_write_mounted)
  770. return;
  771. /* Claim exclusive or shared write access. */
  772. if (mode & BLK_OPEN_RESTRICT_WRITES)
  773. bdev_block_writes(bdev);
  774. else if (mode & BLK_OPEN_WRITE)
  775. bdev->bd_writers++;
  776. }
  777. static inline bool bdev_unclaimed(const struct file *bdev_file)
  778. {
  779. return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
  780. }
  781. static void bdev_yield_write_access(struct file *bdev_file)
  782. {
  783. struct block_device *bdev;
  784. if (bdev_allow_write_mounted)
  785. return;
  786. if (bdev_unclaimed(bdev_file))
  787. return;
  788. bdev = file_bdev(bdev_file);
  789. if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
  790. bdev_unblock_writes(bdev);
  791. else if (bdev_file->f_mode & FMODE_WRITE)
  792. bdev->bd_writers--;
  793. }
  794. /**
  795. * bdev_open - open a block device
  796. * @bdev: block device to open
  797. * @mode: open mode (BLK_OPEN_*)
  798. * @holder: exclusive holder identifier
  799. * @hops: holder operations
  800. * @bdev_file: file for the block device
  801. *
  802. * Open the block device. If @holder is not %NULL, the block device is opened
  803. * with exclusive access. Exclusive opens may nest for the same @holder.
  804. *
  805. * CONTEXT:
  806. * Might sleep.
  807. *
  808. * RETURNS:
  809. * zero on success, -errno on failure.
  810. */
  811. int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
  812. const struct blk_holder_ops *hops, struct file *bdev_file)
  813. {
  814. bool unblock_events = true;
  815. struct gendisk *disk = bdev->bd_disk;
  816. int ret;
  817. if (holder) {
  818. mode |= BLK_OPEN_EXCL;
  819. ret = bd_prepare_to_claim(bdev, holder, hops);
  820. if (ret)
  821. return ret;
  822. } else {
  823. if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL))
  824. return -EIO;
  825. }
  826. disk_block_events(disk);
  827. mutex_lock(&disk->open_mutex);
  828. ret = -ENXIO;
  829. if (!disk_live(disk))
  830. goto abort_claiming;
  831. if (!try_module_get(disk->fops->owner))
  832. goto abort_claiming;
  833. ret = -EBUSY;
  834. if (!bdev_may_open(bdev, mode))
  835. goto put_module;
  836. if (bdev_is_partition(bdev))
  837. ret = blkdev_get_part(bdev, mode);
  838. else
  839. ret = blkdev_get_whole(bdev, mode);
  840. if (ret)
  841. goto put_module;
  842. bdev_claim_write_access(bdev, mode);
  843. if (holder) {
  844. bd_finish_claiming(bdev, holder, hops);
  845. /*
  846. * Block event polling for write claims if requested. Any write
  847. * holder makes the write_holder state stick until all are
  848. * released. This is good enough and tracking individual
  849. * writeable reference is too fragile given the way @mode is
  850. * used in blkdev_get/put().
  851. */
  852. if ((mode & BLK_OPEN_WRITE) &&
  853. !bdev_test_flag(bdev, BD_WRITE_HOLDER) &&
  854. (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
  855. bdev_set_flag(bdev, BD_WRITE_HOLDER);
  856. unblock_events = false;
  857. }
  858. }
  859. mutex_unlock(&disk->open_mutex);
  860. if (unblock_events)
  861. disk_unblock_events(disk);
  862. bdev_file->f_flags |= O_LARGEFILE;
  863. bdev_file->f_mode |= FMODE_CAN_ODIRECT;
  864. if (bdev_nowait(bdev))
  865. bdev_file->f_mode |= FMODE_NOWAIT;
  866. if (mode & BLK_OPEN_RESTRICT_WRITES)
  867. bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
  868. bdev_file->f_mapping = bdev->bd_mapping;
  869. bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
  870. bdev_file->private_data = holder;
  871. return 0;
  872. put_module:
  873. module_put(disk->fops->owner);
  874. abort_claiming:
  875. if (holder)
  876. bd_abort_claiming(bdev, holder);
  877. mutex_unlock(&disk->open_mutex);
  878. disk_unblock_events(disk);
  879. return ret;
  880. }
  881. /*
  882. * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
  883. * associated with the floppy driver where it has allowed ioctls if the
  884. * file was opened for writing, but does not allow reads or writes.
  885. * Make sure that this quirk is reflected in @f_flags.
  886. *
  887. * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
  888. */
  889. static unsigned blk_to_file_flags(blk_mode_t mode)
  890. {
  891. unsigned int flags = 0;
  892. if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
  893. (BLK_OPEN_READ | BLK_OPEN_WRITE))
  894. flags |= O_RDWR;
  895. else if (mode & BLK_OPEN_WRITE_IOCTL)
  896. flags |= O_RDWR | O_WRONLY;
  897. else if (mode & BLK_OPEN_WRITE)
  898. flags |= O_WRONLY;
  899. else if (mode & BLK_OPEN_READ)
  900. flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
  901. else
  902. WARN_ON_ONCE(true);
  903. if (mode & BLK_OPEN_NDELAY)
  904. flags |= O_NDELAY;
  905. return flags;
  906. }
  907. struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
  908. const struct blk_holder_ops *hops)
  909. {
  910. struct file *bdev_file;
  911. struct block_device *bdev;
  912. unsigned int flags;
  913. int ret;
  914. ret = bdev_permission(dev, mode, holder);
  915. if (ret)
  916. return ERR_PTR(ret);
  917. bdev = blkdev_get_no_open(dev, true);
  918. if (!bdev)
  919. return ERR_PTR(-ENXIO);
  920. flags = blk_to_file_flags(mode);
  921. bdev_file = alloc_file_pseudo_noaccount(BD_INODE(bdev),
  922. blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
  923. if (IS_ERR(bdev_file)) {
  924. blkdev_put_no_open(bdev);
  925. return bdev_file;
  926. }
  927. ihold(BD_INODE(bdev));
  928. ret = bdev_open(bdev, mode, holder, hops, bdev_file);
  929. if (ret) {
  930. /* We failed to open the block device. Let ->release() know. */
  931. bdev_file->private_data = ERR_PTR(ret);
  932. fput(bdev_file);
  933. return ERR_PTR(ret);
  934. }
  935. return bdev_file;
  936. }
  937. EXPORT_SYMBOL(bdev_file_open_by_dev);
  938. struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
  939. void *holder,
  940. const struct blk_holder_ops *hops)
  941. {
  942. struct file *file;
  943. dev_t dev;
  944. int error;
  945. error = lookup_bdev(path, &dev);
  946. if (error)
  947. return ERR_PTR(error);
  948. file = bdev_file_open_by_dev(dev, mode, holder, hops);
  949. if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) {
  950. if (bdev_read_only(file_bdev(file))) {
  951. fput(file);
  952. file = ERR_PTR(-EACCES);
  953. }
  954. }
  955. return file;
  956. }
  957. EXPORT_SYMBOL(bdev_file_open_by_path);
  958. static inline void bd_yield_claim(struct file *bdev_file)
  959. {
  960. struct block_device *bdev = file_bdev(bdev_file);
  961. void *holder = bdev_file->private_data;
  962. lockdep_assert_held(&bdev->bd_disk->open_mutex);
  963. if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
  964. return;
  965. if (!bdev_unclaimed(bdev_file))
  966. bd_end_claim(bdev, holder);
  967. }
  968. void bdev_release(struct file *bdev_file)
  969. {
  970. struct block_device *bdev = file_bdev(bdev_file);
  971. void *holder = bdev_file->private_data;
  972. struct gendisk *disk = bdev->bd_disk;
  973. /* We failed to open that block device. */
  974. if (IS_ERR(holder))
  975. goto put_no_open;
  976. /*
  977. * Sync early if it looks like we're the last one. If someone else
  978. * opens the block device between now and the decrement of bd_openers
  979. * then we did a sync that we didn't need to, but that's not the end
  980. * of the world and we want to avoid long (could be several minute)
  981. * syncs while holding the mutex.
  982. */
  983. if (atomic_read(&bdev->bd_openers) == 1)
  984. sync_blockdev(bdev);
  985. mutex_lock(&disk->open_mutex);
  986. bdev_yield_write_access(bdev_file);
  987. if (holder)
  988. bd_yield_claim(bdev_file);
  989. /*
  990. * Trigger event checking and tell drivers to flush MEDIA_CHANGE
  991. * event. This is to ensure detection of media removal commanded
  992. * from userland - e.g. eject(1).
  993. */
  994. disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
  995. if (bdev_is_partition(bdev))
  996. blkdev_put_part(bdev);
  997. else
  998. blkdev_put_whole(bdev);
  999. mutex_unlock(&disk->open_mutex);
  1000. module_put(disk->fops->owner);
  1001. put_no_open:
  1002. blkdev_put_no_open(bdev);
  1003. }
  1004. /**
  1005. * bdev_fput - yield claim to the block device and put the file
  1006. * @bdev_file: open block device
  1007. *
  1008. * Yield claim on the block device and put the file. Ensure that the
  1009. * block device can be reclaimed before the file is closed which is a
  1010. * deferred operation.
  1011. */
  1012. void bdev_fput(struct file *bdev_file)
  1013. {
  1014. if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
  1015. return;
  1016. if (bdev_file->private_data) {
  1017. struct block_device *bdev = file_bdev(bdev_file);
  1018. struct gendisk *disk = bdev->bd_disk;
  1019. mutex_lock(&disk->open_mutex);
  1020. bdev_yield_write_access(bdev_file);
  1021. bd_yield_claim(bdev_file);
  1022. /*
  1023. * Tell release we already gave up our hold on the
  1024. * device and if write restrictions are available that
  1025. * we already gave up write access to the device.
  1026. */
  1027. bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
  1028. mutex_unlock(&disk->open_mutex);
  1029. }
  1030. fput(bdev_file);
  1031. }
  1032. EXPORT_SYMBOL(bdev_fput);
  1033. /**
  1034. * lookup_bdev() - Look up a struct block_device by name.
  1035. * @pathname: Name of the block device in the filesystem.
  1036. * @dev: Pointer to the block device's dev_t, if found.
  1037. *
  1038. * Lookup the block device's dev_t at @pathname in the current
  1039. * namespace if possible and return it in @dev.
  1040. *
  1041. * Context: May sleep.
  1042. * Return: 0 if succeeded, negative errno otherwise.
  1043. */
  1044. int lookup_bdev(const char *pathname, dev_t *dev)
  1045. {
  1046. struct inode *inode;
  1047. struct path path;
  1048. int error;
  1049. if (!pathname || !*pathname)
  1050. return -EINVAL;
  1051. error = kern_path(pathname, LOOKUP_FOLLOW, &path);
  1052. if (error)
  1053. return error;
  1054. inode = d_backing_inode(path.dentry);
  1055. error = -ENOTBLK;
  1056. if (!S_ISBLK(inode->i_mode))
  1057. goto out_path_put;
  1058. error = -EACCES;
  1059. if (!may_open_dev(&path))
  1060. goto out_path_put;
  1061. *dev = inode->i_rdev;
  1062. error = 0;
  1063. out_path_put:
  1064. path_put(&path);
  1065. return error;
  1066. }
  1067. EXPORT_SYMBOL(lookup_bdev);
  1068. /**
  1069. * bdev_mark_dead - mark a block device as dead
  1070. * @bdev: block device to operate on
  1071. * @surprise: indicate a surprise removal
  1072. *
  1073. * Tell the file system that this devices or media is dead. If @surprise is set
  1074. * to %true the device or media is already gone, if not we are preparing for an
  1075. * orderly removal.
  1076. *
  1077. * This calls into the file system, which then typicall syncs out all dirty data
  1078. * and writes back inodes and then invalidates any cached data in the inodes on
  1079. * the file system. In addition we also invalidate the block device mapping.
  1080. */
  1081. void bdev_mark_dead(struct block_device *bdev, bool surprise)
  1082. {
  1083. mutex_lock(&bdev->bd_holder_lock);
  1084. if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
  1085. bdev->bd_holder_ops->mark_dead(bdev, surprise);
  1086. else {
  1087. mutex_unlock(&bdev->bd_holder_lock);
  1088. sync_blockdev(bdev);
  1089. }
  1090. invalidate_bdev(bdev);
  1091. }
  1092. /*
  1093. * New drivers should not use this directly. There are some drivers however
  1094. * that needs this for historical reasons. For example, the DASD driver has
  1095. * historically had a shutdown to offline mode that doesn't actually remove the
  1096. * gendisk that otherwise looks a lot like a safe device removal.
  1097. */
  1098. EXPORT_SYMBOL_GPL(bdev_mark_dead);
  1099. void sync_bdevs(bool wait)
  1100. {
  1101. struct inode *inode, *old_inode = NULL;
  1102. spin_lock(&blockdev_superblock->s_inode_list_lock);
  1103. list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
  1104. struct address_space *mapping = inode->i_mapping;
  1105. struct block_device *bdev;
  1106. spin_lock(&inode->i_lock);
  1107. if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW) ||
  1108. mapping->nrpages == 0) {
  1109. spin_unlock(&inode->i_lock);
  1110. continue;
  1111. }
  1112. __iget(inode);
  1113. spin_unlock(&inode->i_lock);
  1114. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  1115. /*
  1116. * We hold a reference to 'inode' so it couldn't have been
  1117. * removed from s_inodes list while we dropped the
  1118. * s_inode_list_lock We cannot iput the inode now as we can
  1119. * be holding the last reference and we cannot iput it under
  1120. * s_inode_list_lock. So we keep the reference and iput it
  1121. * later.
  1122. */
  1123. iput(old_inode);
  1124. old_inode = inode;
  1125. bdev = I_BDEV(inode);
  1126. mutex_lock(&bdev->bd_disk->open_mutex);
  1127. if (!atomic_read(&bdev->bd_openers)) {
  1128. ; /* skip */
  1129. } else if (wait) {
  1130. /*
  1131. * We keep the error status of individual mapping so
  1132. * that applications can catch the writeback error using
  1133. * fsync(2). See filemap_fdatawait_keep_errors() for
  1134. * details.
  1135. */
  1136. filemap_fdatawait_keep_errors(inode->i_mapping);
  1137. } else {
  1138. filemap_fdatawrite(inode->i_mapping);
  1139. }
  1140. mutex_unlock(&bdev->bd_disk->open_mutex);
  1141. spin_lock(&blockdev_superblock->s_inode_list_lock);
  1142. }
  1143. spin_unlock(&blockdev_superblock->s_inode_list_lock);
  1144. iput(old_inode);
  1145. }
  1146. /*
  1147. * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
  1148. */
  1149. void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
  1150. {
  1151. struct block_device *bdev;
  1152. /*
  1153. * Note that d_backing_inode() returns the block device node inode, not
  1154. * the block device's internal inode. Therefore it is *not* valid to
  1155. * use I_BDEV() here; the block device has to be looked up by i_rdev
  1156. * instead.
  1157. */
  1158. bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
  1159. if (!bdev)
  1160. return;
  1161. if (request_mask & STATX_DIOALIGN) {
  1162. stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
  1163. stat->dio_offset_align = bdev_logical_block_size(bdev);
  1164. stat->result_mask |= STATX_DIOALIGN;
  1165. }
  1166. if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
  1167. struct request_queue *bd_queue = bdev->bd_queue;
  1168. generic_fill_statx_atomic_writes(stat,
  1169. queue_atomic_write_unit_min_bytes(bd_queue),
  1170. queue_atomic_write_unit_max_bytes(bd_queue),
  1171. 0);
  1172. }
  1173. stat->blksize = bdev_io_min(bdev);
  1174. blkdev_put_no_open(bdev);
  1175. }
  1176. bool disk_live(struct gendisk *disk)
  1177. {
  1178. return !inode_unhashed(BD_INODE(disk->part0));
  1179. }
  1180. EXPORT_SYMBOL_GPL(disk_live);
  1181. unsigned int block_size(struct block_device *bdev)
  1182. {
  1183. return 1 << BD_INODE(bdev)->i_blkbits;
  1184. }
  1185. EXPORT_SYMBOL_GPL(block_size);
  1186. static int __init setup_bdev_allow_write_mounted(char *str)
  1187. {
  1188. if (kstrtobool(str, &bdev_allow_write_mounted))
  1189. pr_warn("Invalid option string for bdev_allow_write_mounted:"
  1190. " '%s'\n", str);
  1191. return 1;
  1192. }
  1193. __setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);