btnode.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * NILFS B-tree node cache
  4. *
  5. * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  6. *
  7. * Originally written by Seiji Kihara.
  8. * Fully revised by Ryusuke Konishi for stabilization and simplification.
  9. *
  10. */
  11. #include <linux/types.h>
  12. #include <linux/buffer_head.h>
  13. #include <linux/mm.h>
  14. #include <linux/backing-dev.h>
  15. #include <linux/gfp.h>
  16. #include "nilfs.h"
  17. #include "mdt.h"
  18. #include "dat.h"
  19. #include "page.h"
  20. #include "btnode.h"
  21. /**
  22. * nilfs_init_btnc_inode - initialize B-tree node cache inode
  23. * @btnc_inode: inode to be initialized
  24. *
  25. * nilfs_init_btnc_inode() sets up an inode for B-tree node cache.
  26. */
  27. void nilfs_init_btnc_inode(struct inode *btnc_inode)
  28. {
  29. struct nilfs_inode_info *ii = NILFS_I(btnc_inode);
  30. btnc_inode->i_mode = S_IFREG;
  31. ii->i_flags = 0;
  32. memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
  33. mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
  34. btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops;
  35. }
  36. void nilfs_btnode_cache_clear(struct address_space *btnc)
  37. {
  38. invalidate_mapping_pages(btnc, 0, -1);
  39. truncate_inode_pages(btnc, 0);
  40. }
  41. struct buffer_head *
  42. nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
  43. {
  44. struct inode *inode = btnc->host;
  45. struct buffer_head *bh;
  46. bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
  47. if (unlikely(!bh))
  48. return ERR_PTR(-ENOMEM);
  49. if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
  50. buffer_dirty(bh))) {
  51. /*
  52. * The block buffer at the specified new address was already
  53. * in use. This can happen if it is a virtual block number
  54. * and has been reallocated due to corruption of the bitmap
  55. * used to manage its allocation state (if not, the buffer
  56. * clearing of an abandoned b-tree node is missing somewhere).
  57. */
  58. nilfs_error(inode->i_sb,
  59. "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
  60. (unsigned long long)blocknr, inode->i_ino);
  61. goto failed;
  62. }
  63. memset(bh->b_data, 0, i_blocksize(inode));
  64. bh->b_blocknr = blocknr;
  65. set_buffer_mapped(bh);
  66. set_buffer_uptodate(bh);
  67. folio_unlock(bh->b_folio);
  68. folio_put(bh->b_folio);
  69. return bh;
  70. failed:
  71. folio_unlock(bh->b_folio);
  72. folio_put(bh->b_folio);
  73. brelse(bh);
  74. return ERR_PTR(-EIO);
  75. }
  76. int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
  77. sector_t pblocknr, blk_opf_t opf,
  78. struct buffer_head **pbh, sector_t *submit_ptr)
  79. {
  80. struct buffer_head *bh;
  81. struct inode *inode = btnc->host;
  82. struct folio *folio;
  83. int err;
  84. bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
  85. if (unlikely(!bh))
  86. return -ENOMEM;
  87. err = -EEXIST; /* internal code */
  88. folio = bh->b_folio;
  89. if (buffer_uptodate(bh) || buffer_dirty(bh))
  90. goto found;
  91. if (pblocknr == 0) {
  92. pblocknr = blocknr;
  93. if (inode->i_ino != NILFS_DAT_INO) {
  94. struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
  95. /* blocknr is a virtual block number */
  96. err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
  97. &pblocknr);
  98. if (unlikely(err)) {
  99. brelse(bh);
  100. goto out_locked;
  101. }
  102. }
  103. }
  104. if (opf & REQ_RAHEAD) {
  105. if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
  106. err = -EBUSY; /* internal code */
  107. brelse(bh);
  108. goto out_locked;
  109. }
  110. } else { /* opf == REQ_OP_READ */
  111. lock_buffer(bh);
  112. }
  113. if (buffer_uptodate(bh)) {
  114. unlock_buffer(bh);
  115. err = -EEXIST; /* internal code */
  116. goto found;
  117. }
  118. set_buffer_mapped(bh);
  119. bh->b_blocknr = pblocknr; /* set block address for read */
  120. bh->b_end_io = end_buffer_read_sync;
  121. get_bh(bh);
  122. submit_bh(opf, bh);
  123. bh->b_blocknr = blocknr; /* set back to the given block address */
  124. *submit_ptr = pblocknr;
  125. err = 0;
  126. found:
  127. *pbh = bh;
  128. out_locked:
  129. folio_unlock(folio);
  130. folio_put(folio);
  131. return err;
  132. }
  133. /**
  134. * nilfs_btnode_delete - delete B-tree node buffer
  135. * @bh: buffer to be deleted
  136. *
  137. * nilfs_btnode_delete() invalidates the specified buffer and delete the page
  138. * including the buffer if the page gets unbusy.
  139. */
  140. void nilfs_btnode_delete(struct buffer_head *bh)
  141. {
  142. struct address_space *mapping;
  143. struct folio *folio = bh->b_folio;
  144. pgoff_t index = folio->index;
  145. int still_dirty;
  146. folio_get(folio);
  147. folio_lock(folio);
  148. folio_wait_writeback(folio);
  149. nilfs_forget_buffer(bh);
  150. still_dirty = folio_test_dirty(folio);
  151. mapping = folio->mapping;
  152. folio_unlock(folio);
  153. folio_put(folio);
  154. if (!still_dirty && mapping)
  155. invalidate_inode_pages2_range(mapping, index, index);
  156. }
  157. /**
  158. * nilfs_btnode_prepare_change_key - prepare to change the search key of a
  159. * b-tree node block
  160. * @btnc: page cache in which the b-tree node block is buffered
  161. * @ctxt: structure for exchanging context information for key change
  162. *
  163. * nilfs_btnode_prepare_change_key() prepares to move the contents of the
  164. * b-tree node block of the old key given in the "oldkey" member of @ctxt to
  165. * the position of the new key given in the "newkey" member of @ctxt in the
  166. * page cache @btnc. Here, the key of the block is an index in units of
  167. * blocks, and if the page and block sizes match, it matches the page index
  168. * in the page cache.
  169. *
  170. * If the page size and block size match, this function attempts to move the
  171. * entire folio, and in preparation for this, inserts the original folio into
  172. * the new index of the cache. If this insertion fails or if the page size
  173. * and block size are different, it falls back to a copy preparation using
  174. * nilfs_btnode_create_block(), inserts a new block at the position
  175. * corresponding to "newkey", and stores the buffer head pointer in the
  176. * "newbh" member of @ctxt.
  177. *
  178. * Note that the current implementation does not support folio sizes larger
  179. * than the page size.
  180. *
  181. * Return: 0 on success, or one of the following negative error codes on
  182. * failure:
  183. * * %-EIO - I/O error (metadata corruption).
  184. * * %-ENOMEM - Insufficient memory available.
  185. */
  186. int nilfs_btnode_prepare_change_key(struct address_space *btnc,
  187. struct nilfs_btnode_chkey_ctxt *ctxt)
  188. {
  189. struct buffer_head *obh, *nbh;
  190. struct inode *inode = btnc->host;
  191. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  192. int err;
  193. if (oldkey == newkey)
  194. return 0;
  195. obh = ctxt->bh;
  196. ctxt->newbh = NULL;
  197. if (inode->i_blkbits == PAGE_SHIFT) {
  198. struct folio *ofolio = obh->b_folio;
  199. folio_lock(ofolio);
  200. retry:
  201. /* BUG_ON(oldkey != obh->b_folio->index); */
  202. if (unlikely(oldkey != ofolio->index))
  203. NILFS_FOLIO_BUG(ofolio,
  204. "invalid oldkey %lld (newkey=%lld)",
  205. (unsigned long long)oldkey,
  206. (unsigned long long)newkey);
  207. xa_lock_irq(&btnc->i_pages);
  208. err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS);
  209. xa_unlock_irq(&btnc->i_pages);
  210. /*
  211. * Note: folio->index will not change to newkey until
  212. * nilfs_btnode_commit_change_key() will be called.
  213. * To protect the folio in intermediate state, the folio lock
  214. * is held.
  215. */
  216. if (!err)
  217. return 0;
  218. else if (err != -EBUSY)
  219. goto failed_unlock;
  220. err = invalidate_inode_pages2_range(btnc, newkey, newkey);
  221. if (!err)
  222. goto retry;
  223. /* fallback to copy mode */
  224. folio_unlock(ofolio);
  225. }
  226. nbh = nilfs_btnode_create_block(btnc, newkey);
  227. if (IS_ERR(nbh))
  228. return PTR_ERR(nbh);
  229. BUG_ON(nbh == obh);
  230. ctxt->newbh = nbh;
  231. return 0;
  232. failed_unlock:
  233. folio_unlock(obh->b_folio);
  234. return err;
  235. }
  236. /**
  237. * nilfs_btnode_commit_change_key - commit the change of the search key of
  238. * a b-tree node block
  239. * @btnc: page cache in which the b-tree node block is buffered
  240. * @ctxt: structure for exchanging context information for key change
  241. *
  242. * nilfs_btnode_commit_change_key() executes the key change based on the
  243. * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid
  244. * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move),
  245. * this function removes the folio from the old index and completes the move.
  246. * Otherwise, it copies the block data and inherited flag states of "oldbh"
  247. * to "newbh" and clears the "oldbh" from the cache. In either case, the
  248. * relocated buffer is marked as dirty.
  249. *
  250. * As with nilfs_btnode_prepare_change_key(), the current implementation does
  251. * not support folio sizes larger than the page size.
  252. */
  253. void nilfs_btnode_commit_change_key(struct address_space *btnc,
  254. struct nilfs_btnode_chkey_ctxt *ctxt)
  255. {
  256. struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
  257. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  258. struct folio *ofolio;
  259. if (oldkey == newkey)
  260. return;
  261. if (nbh == NULL) { /* blocksize == pagesize */
  262. ofolio = obh->b_folio;
  263. if (unlikely(oldkey != ofolio->index))
  264. NILFS_FOLIO_BUG(ofolio,
  265. "invalid oldkey %lld (newkey=%lld)",
  266. (unsigned long long)oldkey,
  267. (unsigned long long)newkey);
  268. mark_buffer_dirty(obh);
  269. xa_lock_irq(&btnc->i_pages);
  270. __xa_erase(&btnc->i_pages, oldkey);
  271. __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY);
  272. xa_unlock_irq(&btnc->i_pages);
  273. ofolio->index = obh->b_blocknr = newkey;
  274. folio_unlock(ofolio);
  275. } else {
  276. nilfs_copy_buffer(nbh, obh);
  277. mark_buffer_dirty(nbh);
  278. nbh->b_blocknr = newkey;
  279. ctxt->bh = nbh;
  280. nilfs_btnode_delete(obh); /* will decrement bh->b_count */
  281. }
  282. }
  283. /**
  284. * nilfs_btnode_abort_change_key - abort the change of the search key of a
  285. * b-tree node block
  286. * @btnc: page cache in which the b-tree node block is buffered
  287. * @ctxt: structure for exchanging context information for key change
  288. *
  289. * nilfs_btnode_abort_change_key() cancels the key change associated with the
  290. * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs
  291. * any necessary cleanup. If no valid block buffer is prepared in "newbh" of
  292. * @ctxt, this function removes the folio from the destination index and aborts
  293. * the move. Otherwise, it clears "newbh" from the cache.
  294. *
  295. * As with nilfs_btnode_prepare_change_key(), the current implementation does
  296. * not support folio sizes larger than the page size.
  297. */
  298. void nilfs_btnode_abort_change_key(struct address_space *btnc,
  299. struct nilfs_btnode_chkey_ctxt *ctxt)
  300. {
  301. struct buffer_head *nbh = ctxt->newbh;
  302. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  303. if (oldkey == newkey)
  304. return;
  305. if (nbh == NULL) { /* blocksize == pagesize */
  306. xa_erase_irq(&btnc->i_pages, newkey);
  307. folio_unlock(ctxt->bh->b_folio);
  308. } else {
  309. /*
  310. * When canceling a buffer that a prepare operation has
  311. * allocated to copy a node block to another location, use
  312. * nilfs_btnode_delete() to initialize and release the buffer
  313. * so that the buffer flags will not be in an inconsistent
  314. * state when it is reallocated.
  315. */
  316. nilfs_btnode_delete(nbh);
  317. }
  318. }