alloc.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * NILFS dat/inode allocator
  4. *
  5. * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  6. *
  7. * Originally written by Koji Sato.
  8. * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji.
  9. */
  10. #include <linux/types.h>
  11. #include <linux/buffer_head.h>
  12. #include <linux/fs.h>
  13. #include <linux/bitops.h>
  14. #include <linux/slab.h>
  15. #include "mdt.h"
  16. #include "alloc.h"
  17. /**
  18. * nilfs_palloc_groups_per_desc_block - get the number of groups that a group
  19. * descriptor block can maintain
  20. * @inode: inode of metadata file using this allocator
  21. *
  22. * Return: Number of groups that a group descriptor block can maintain.
  23. */
  24. static inline unsigned long
  25. nilfs_palloc_groups_per_desc_block(const struct inode *inode)
  26. {
  27. return i_blocksize(inode) /
  28. sizeof(struct nilfs_palloc_group_desc);
  29. }
  30. /**
  31. * nilfs_palloc_groups_count - get maximum number of groups
  32. * @inode: inode of metadata file using this allocator
  33. *
  34. * Return: Maximum number of groups.
  35. */
  36. static inline unsigned long
  37. nilfs_palloc_groups_count(const struct inode *inode)
  38. {
  39. return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
  40. }
  41. /**
  42. * nilfs_palloc_init_blockgroup - initialize private variables for allocator
  43. * @inode: inode of metadata file using this allocator
  44. * @entry_size: size of the persistent object
  45. *
  46. * Return: 0 on success, or a negative error code on failure.
  47. */
  48. int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned int entry_size)
  49. {
  50. struct nilfs_mdt_info *mi = NILFS_MDT(inode);
  51. mi->mi_bgl = kmalloc_obj(*mi->mi_bgl, GFP_NOFS);
  52. if (!mi->mi_bgl)
  53. return -ENOMEM;
  54. bgl_lock_init(mi->mi_bgl);
  55. nilfs_mdt_set_entry_size(inode, entry_size, 0);
  56. mi->mi_blocks_per_group =
  57. DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
  58. mi->mi_entries_per_block) + 1;
  59. /*
  60. * Number of blocks in a group including entry blocks
  61. * and a bitmap block
  62. */
  63. mi->mi_blocks_per_desc_block =
  64. nilfs_palloc_groups_per_desc_block(inode) *
  65. mi->mi_blocks_per_group + 1;
  66. /*
  67. * Number of blocks per descriptor including the
  68. * descriptor block
  69. */
  70. return 0;
  71. }
  72. /**
  73. * nilfs_palloc_group - get group number and offset from an entry number
  74. * @inode: inode of metadata file using this allocator
  75. * @nr: serial number of the entry (e.g. inode number)
  76. * @offset: pointer to store offset number in the group
  77. *
  78. * Return: Number of the group that contains the entry with the index
  79. * specified by @nr.
  80. */
  81. static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
  82. unsigned long *offset)
  83. {
  84. __u64 group = nr;
  85. *offset = do_div(group, nilfs_palloc_entries_per_group(inode));
  86. return group;
  87. }
  88. /**
  89. * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block
  90. * @inode: inode of metadata file using this allocator
  91. * @group: group number
  92. *
  93. * Return: Index number in the metadata file of the descriptor block of
  94. * the group specified by @group.
  95. */
  96. static unsigned long
  97. nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
  98. {
  99. unsigned long desc_block =
  100. group / nilfs_palloc_groups_per_desc_block(inode);
  101. return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
  102. }
  103. /**
  104. * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block
  105. * @inode: inode of metadata file using this allocator
  106. * @group: group number
  107. *
  108. * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
  109. * block used to allocate/deallocate entries in the specified group.
  110. *
  111. * Return: Index number in the metadata file of the bitmap block of
  112. * the group specified by @group.
  113. */
  114. static unsigned long
  115. nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
  116. {
  117. unsigned long desc_offset =
  118. group % nilfs_palloc_groups_per_desc_block(inode);
  119. return nilfs_palloc_desc_blkoff(inode, group) + 1 +
  120. desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
  121. }
  122. /**
  123. * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
  124. * @desc: pointer to descriptor structure for the group
  125. * @lock: spin lock protecting @desc
  126. *
  127. * Return: Number of free entries written in the group descriptor @desc.
  128. */
  129. static unsigned long
  130. nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc,
  131. spinlock_t *lock)
  132. {
  133. unsigned long nfree;
  134. spin_lock(lock);
  135. nfree = le32_to_cpu(desc->pg_nfrees);
  136. spin_unlock(lock);
  137. return nfree;
  138. }
  139. /**
  140. * nilfs_palloc_group_desc_add_entries - adjust count of free entries
  141. * @desc: pointer to descriptor structure for the group
  142. * @lock: spin lock protecting @desc
  143. * @n: delta to be added
  144. *
  145. * Return: Number of free entries after adjusting the group descriptor
  146. * @desc.
  147. */
  148. static u32
  149. nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc,
  150. spinlock_t *lock, u32 n)
  151. {
  152. u32 nfree;
  153. spin_lock(lock);
  154. le32_add_cpu(&desc->pg_nfrees, n);
  155. nfree = le32_to_cpu(desc->pg_nfrees);
  156. spin_unlock(lock);
  157. return nfree;
  158. }
  159. /**
  160. * nilfs_palloc_entry_blkoff - get block offset of an entry block
  161. * @inode: inode of metadata file using this allocator
  162. * @nr: serial number of the entry (e.g. inode number)
  163. *
  164. * Return: Index number in the metadata file of the block containing
  165. * the entry specified by @nr.
  166. */
  167. static unsigned long
  168. nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
  169. {
  170. unsigned long group, group_offset;
  171. group = nilfs_palloc_group(inode, nr, &group_offset);
  172. return nilfs_palloc_bitmap_blkoff(inode, group) + 1 +
  173. group_offset / NILFS_MDT(inode)->mi_entries_per_block;
  174. }
  175. /**
  176. * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
  177. * @inode: inode of metadata file
  178. * @bh: buffer head of the buffer to be initialized
  179. * @from: kernel address mapped for a chunk of the block
  180. *
  181. * This function does not yet support the case where block size > PAGE_SIZE.
  182. */
  183. static void nilfs_palloc_desc_block_init(struct inode *inode,
  184. struct buffer_head *bh, void *from)
  185. {
  186. struct nilfs_palloc_group_desc *desc = from;
  187. unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
  188. __le32 nfrees;
  189. nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode));
  190. while (n-- > 0) {
  191. desc->pg_nfrees = nfrees;
  192. desc++;
  193. }
  194. }
  195. static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
  196. int create,
  197. void (*init_block)(struct inode *,
  198. struct buffer_head *,
  199. void *),
  200. struct buffer_head **bhp,
  201. struct nilfs_bh_assoc *prev,
  202. spinlock_t *lock)
  203. {
  204. int ret;
  205. spin_lock(lock);
  206. if (prev->bh && blkoff == prev->blkoff &&
  207. likely(buffer_uptodate(prev->bh))) {
  208. get_bh(prev->bh);
  209. *bhp = prev->bh;
  210. spin_unlock(lock);
  211. return 0;
  212. }
  213. spin_unlock(lock);
  214. ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
  215. if (!ret) {
  216. spin_lock(lock);
  217. /*
  218. * The following code must be safe for change of the
  219. * cache contents during the get block call.
  220. */
  221. brelse(prev->bh);
  222. get_bh(*bhp);
  223. prev->bh = *bhp;
  224. prev->blkoff = blkoff;
  225. spin_unlock(lock);
  226. }
  227. return ret;
  228. }
  229. /**
  230. * nilfs_palloc_delete_block - delete a block on the persistent allocator file
  231. * @inode: inode of metadata file using this allocator
  232. * @blkoff: block offset
  233. * @prev: nilfs_bh_assoc struct of the last used buffer
  234. * @lock: spin lock protecting @prev
  235. *
  236. * Return: 0 on success, or one of the following negative error codes on
  237. * failure:
  238. * * %-EIO - I/O error (including metadata corruption).
  239. * * %-ENOENT - Non-existent block.
  240. * * %-ENOMEM - Insufficient memory available.
  241. */
  242. static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff,
  243. struct nilfs_bh_assoc *prev,
  244. spinlock_t *lock)
  245. {
  246. spin_lock(lock);
  247. if (prev->bh && blkoff == prev->blkoff) {
  248. brelse(prev->bh);
  249. prev->bh = NULL;
  250. }
  251. spin_unlock(lock);
  252. return nilfs_mdt_delete_block(inode, blkoff);
  253. }
  254. /**
  255. * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
  256. * @inode: inode of metadata file using this allocator
  257. * @group: group number
  258. * @create: create flag
  259. * @bhp: pointer to store the resultant buffer head
  260. *
  261. * Return: 0 on success, or a negative error code on failure.
  262. */
  263. static int nilfs_palloc_get_desc_block(struct inode *inode,
  264. unsigned long group,
  265. int create, struct buffer_head **bhp)
  266. {
  267. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  268. return nilfs_palloc_get_block(inode,
  269. nilfs_palloc_desc_blkoff(inode, group),
  270. create, nilfs_palloc_desc_block_init,
  271. bhp, &cache->prev_desc, &cache->lock);
  272. }
  273. /**
  274. * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block
  275. * @inode: inode of metadata file using this allocator
  276. * @group: group number
  277. * @create: create flag
  278. * @bhp: pointer to store the resultant buffer head
  279. *
  280. * Return: 0 on success, or a negative error code on failure.
  281. */
  282. static int nilfs_palloc_get_bitmap_block(struct inode *inode,
  283. unsigned long group,
  284. int create, struct buffer_head **bhp)
  285. {
  286. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  287. return nilfs_palloc_get_block(inode,
  288. nilfs_palloc_bitmap_blkoff(inode, group),
  289. create, NULL, bhp,
  290. &cache->prev_bitmap, &cache->lock);
  291. }
  292. /**
  293. * nilfs_palloc_delete_bitmap_block - delete a bitmap block
  294. * @inode: inode of metadata file using this allocator
  295. * @group: group number
  296. *
  297. * Return: 0 on success, or a negative error code on failure.
  298. */
  299. static int nilfs_palloc_delete_bitmap_block(struct inode *inode,
  300. unsigned long group)
  301. {
  302. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  303. return nilfs_palloc_delete_block(inode,
  304. nilfs_palloc_bitmap_blkoff(inode,
  305. group),
  306. &cache->prev_bitmap, &cache->lock);
  307. }
  308. /**
  309. * nilfs_palloc_get_entry_block - get buffer head of an entry block
  310. * @inode: inode of metadata file using this allocator
  311. * @nr: serial number of the entry (e.g. inode number)
  312. * @create: create flag
  313. * @bhp: pointer to store the resultant buffer head
  314. *
  315. * Return: 0 on success, or a negative error code on failure.
  316. */
  317. int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
  318. int create, struct buffer_head **bhp)
  319. {
  320. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  321. return nilfs_palloc_get_block(inode,
  322. nilfs_palloc_entry_blkoff(inode, nr),
  323. create, NULL, bhp,
  324. &cache->prev_entry, &cache->lock);
  325. }
  326. /**
  327. * nilfs_palloc_delete_entry_block - delete an entry block
  328. * @inode: inode of metadata file using this allocator
  329. * @nr: serial number of the entry
  330. *
  331. * Return: 0 on success, or a negative error code on failure.
  332. */
  333. static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr)
  334. {
  335. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  336. return nilfs_palloc_delete_block(inode,
  337. nilfs_palloc_entry_blkoff(inode, nr),
  338. &cache->prev_entry, &cache->lock);
  339. }
  340. /**
  341. * nilfs_palloc_group_desc_offset - calculate the byte offset of a group
  342. * descriptor in the folio containing it
  343. * @inode: inode of metadata file using this allocator
  344. * @group: group number
  345. * @bh: buffer head of the group descriptor block
  346. *
  347. * Return: Byte offset in the folio of the group descriptor for @group.
  348. */
  349. static size_t nilfs_palloc_group_desc_offset(const struct inode *inode,
  350. unsigned long group,
  351. const struct buffer_head *bh)
  352. {
  353. return offset_in_folio(bh->b_folio, bh->b_data) +
  354. sizeof(struct nilfs_palloc_group_desc) *
  355. (group % nilfs_palloc_groups_per_desc_block(inode));
  356. }
  357. /**
  358. * nilfs_palloc_bitmap_offset - calculate the byte offset of a bitmap block
  359. * in the folio containing it
  360. * @bh: buffer head of the bitmap block
  361. *
  362. * Return: Byte offset in the folio of the bitmap block for @bh.
  363. */
  364. static size_t nilfs_palloc_bitmap_offset(const struct buffer_head *bh)
  365. {
  366. return offset_in_folio(bh->b_folio, bh->b_data);
  367. }
  368. /**
  369. * nilfs_palloc_entry_offset - calculate the byte offset of an entry in the
  370. * folio containing it
  371. * @inode: inode of metadata file using this allocator
  372. * @nr: serial number of the entry (e.g. inode number)
  373. * @bh: buffer head of the entry block
  374. *
  375. * Return: Byte offset in the folio of the entry @nr.
  376. */
  377. size_t nilfs_palloc_entry_offset(const struct inode *inode, __u64 nr,
  378. const struct buffer_head *bh)
  379. {
  380. unsigned long entry_index_in_group, entry_index_in_block;
  381. nilfs_palloc_group(inode, nr, &entry_index_in_group);
  382. entry_index_in_block = entry_index_in_group %
  383. NILFS_MDT(inode)->mi_entries_per_block;
  384. return offset_in_folio(bh->b_folio, bh->b_data) +
  385. entry_index_in_block * NILFS_MDT(inode)->mi_entry_size;
  386. }
  387. /**
  388. * nilfs_palloc_find_available_slot - find available slot in a group
  389. * @bitmap: bitmap of the group
  390. * @target: offset number of an entry in the group (start point)
  391. * @bsize: size in bits
  392. * @lock: spin lock protecting @bitmap
  393. * @wrap: whether to wrap around
  394. *
  395. * Return: Offset number within the group of the found free entry, or
  396. * %-ENOSPC if not found.
  397. */
  398. static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
  399. unsigned long target,
  400. unsigned int bsize,
  401. spinlock_t *lock, bool wrap)
  402. {
  403. int pos, end = bsize;
  404. if (likely(target < bsize)) {
  405. pos = target;
  406. do {
  407. pos = nilfs_find_next_zero_bit(bitmap, end, pos);
  408. if (pos >= end)
  409. break;
  410. if (!nilfs_set_bit_atomic(lock, pos, bitmap))
  411. return pos;
  412. } while (++pos < end);
  413. end = target;
  414. }
  415. if (!wrap)
  416. return -ENOSPC;
  417. /* wrap around */
  418. for (pos = 0; pos < end; pos++) {
  419. pos = nilfs_find_next_zero_bit(bitmap, end, pos);
  420. if (pos >= end)
  421. break;
  422. if (!nilfs_set_bit_atomic(lock, pos, bitmap))
  423. return pos;
  424. }
  425. return -ENOSPC;
  426. }
  427. /**
  428. * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups
  429. * in a group descriptor block
  430. * @inode: inode of metadata file using this allocator
  431. * @curr: current group number
  432. * @max: maximum number of groups
  433. *
  434. * Return: Number of remaining descriptors (= groups) managed by the descriptor
  435. * block.
  436. */
  437. static unsigned long
  438. nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
  439. unsigned long curr, unsigned long max)
  440. {
  441. return min_t(unsigned long,
  442. nilfs_palloc_groups_per_desc_block(inode) -
  443. curr % nilfs_palloc_groups_per_desc_block(inode),
  444. max - curr + 1);
  445. }
  446. /**
  447. * nilfs_palloc_count_desc_blocks - count descriptor blocks number
  448. * @inode: inode of metadata file using this allocator
  449. * @desc_blocks: descriptor blocks number [out]
  450. *
  451. * Return: 0 on success, or a negative error code on failure.
  452. */
  453. static int nilfs_palloc_count_desc_blocks(struct inode *inode,
  454. unsigned long *desc_blocks)
  455. {
  456. __u64 blknum;
  457. int ret;
  458. ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
  459. if (likely(!ret))
  460. *desc_blocks = DIV_ROUND_UP(
  461. (unsigned long)blknum,
  462. NILFS_MDT(inode)->mi_blocks_per_desc_block);
  463. return ret;
  464. }
  465. /**
  466. * nilfs_palloc_mdt_file_can_grow - check potential opportunity for
  467. * MDT file growing
  468. * @inode: inode of metadata file using this allocator
  469. * @desc_blocks: known current descriptor blocks count
  470. *
  471. * Return: true if a group can be added in the metadata file, false if not.
  472. */
  473. static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
  474. unsigned long desc_blocks)
  475. {
  476. return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) <
  477. nilfs_palloc_groups_count(inode);
  478. }
  479. /**
  480. * nilfs_palloc_count_max_entries - count max number of entries that can be
  481. * described by descriptor blocks count
  482. * @inode: inode of metadata file using this allocator
  483. * @nused: current number of used entries
  484. * @nmaxp: max number of entries [out]
  485. *
  486. * Return: 0 on success, or one of the following negative error codes on
  487. * failure:
  488. * * %-EIO - I/O error (including metadata corruption).
  489. * * %-ENOMEM - Insufficient memory available.
  490. * * %-ERANGE - Number of entries in use is out of range.
  491. */
  492. int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
  493. {
  494. unsigned long desc_blocks = 0;
  495. u64 entries_per_desc_block, nmax;
  496. int err;
  497. err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks);
  498. if (unlikely(err))
  499. return err;
  500. entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) *
  501. nilfs_palloc_groups_per_desc_block(inode);
  502. nmax = entries_per_desc_block * desc_blocks;
  503. if (nused == nmax &&
  504. nilfs_palloc_mdt_file_can_grow(inode, desc_blocks))
  505. nmax += entries_per_desc_block;
  506. if (nused > nmax)
  507. return -ERANGE;
  508. *nmaxp = nmax;
  509. return 0;
  510. }
  511. /**
  512. * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
  513. * @inode: inode of metadata file using this allocator
  514. * @req: nilfs_palloc_req structure exchanged for the allocation
  515. * @wrap: whether to wrap around
  516. *
  517. * Return: 0 on success, or one of the following negative error codes on
  518. * failure:
  519. * * %-EIO - I/O error (including metadata corruption).
  520. * * %-ENOMEM - Insufficient memory available.
  521. * * %-ENOSPC - Entries exhausted (No entries available for allocation).
  522. * * %-EROFS - Read only filesystem
  523. */
  524. int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
  525. struct nilfs_palloc_req *req, bool wrap)
  526. {
  527. struct buffer_head *desc_bh, *bitmap_bh;
  528. struct nilfs_palloc_group_desc *desc;
  529. unsigned char *bitmap;
  530. size_t doff, boff;
  531. unsigned long group, maxgroup, ngroups;
  532. unsigned long group_offset, maxgroup_offset;
  533. unsigned long n, entries_per_group;
  534. unsigned long i, j;
  535. spinlock_t *lock;
  536. int pos, ret;
  537. ngroups = nilfs_palloc_groups_count(inode);
  538. maxgroup = ngroups - 1;
  539. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  540. entries_per_group = nilfs_palloc_entries_per_group(inode);
  541. for (i = 0; i < ngroups; i += n) {
  542. if (group >= ngroups && wrap) {
  543. /* wrap around */
  544. group = 0;
  545. maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
  546. &maxgroup_offset) - 1;
  547. }
  548. ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
  549. if (ret < 0)
  550. return ret;
  551. doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
  552. desc = kmap_local_folio(desc_bh->b_folio, doff);
  553. n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
  554. maxgroup);
  555. for (j = 0; j < n; j++, group++, group_offset = 0) {
  556. lock = nilfs_mdt_bgl_lock(inode, group);
  557. if (nilfs_palloc_group_desc_nfrees(&desc[j], lock) == 0)
  558. continue;
  559. kunmap_local(desc);
  560. ret = nilfs_palloc_get_bitmap_block(inode, group, 1,
  561. &bitmap_bh);
  562. if (unlikely(ret < 0)) {
  563. brelse(desc_bh);
  564. return ret;
  565. }
  566. /*
  567. * Re-kmap the folio containing the first (and
  568. * subsequent) group descriptors.
  569. */
  570. desc = kmap_local_folio(desc_bh->b_folio, doff);
  571. boff = nilfs_palloc_bitmap_offset(bitmap_bh);
  572. bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
  573. pos = nilfs_palloc_find_available_slot(
  574. bitmap, group_offset, entries_per_group, lock,
  575. wrap);
  576. /*
  577. * Since the search for a free slot in the second and
  578. * subsequent bitmap blocks always starts from the
  579. * beginning, the wrap flag only has an effect on the
  580. * first search.
  581. */
  582. kunmap_local(bitmap);
  583. if (pos >= 0)
  584. goto found;
  585. brelse(bitmap_bh);
  586. }
  587. kunmap_local(desc);
  588. brelse(desc_bh);
  589. }
  590. /* no entries left */
  591. return -ENOSPC;
  592. found:
  593. /* found a free entry */
  594. nilfs_palloc_group_desc_add_entries(&desc[j], lock, -1);
  595. req->pr_entry_nr = entries_per_group * group + pos;
  596. kunmap_local(desc);
  597. req->pr_desc_bh = desc_bh;
  598. req->pr_bitmap_bh = bitmap_bh;
  599. return 0;
  600. }
  601. /**
  602. * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object
  603. * @inode: inode of metadata file using this allocator
  604. * @req: nilfs_palloc_req structure exchanged for the allocation
  605. */
  606. void nilfs_palloc_commit_alloc_entry(struct inode *inode,
  607. struct nilfs_palloc_req *req)
  608. {
  609. mark_buffer_dirty(req->pr_bitmap_bh);
  610. mark_buffer_dirty(req->pr_desc_bh);
  611. nilfs_mdt_mark_dirty(inode);
  612. brelse(req->pr_bitmap_bh);
  613. brelse(req->pr_desc_bh);
  614. }
  615. /**
  616. * nilfs_palloc_commit_free_entry - finish deallocating a persistent object
  617. * @inode: inode of metadata file using this allocator
  618. * @req: nilfs_palloc_req structure exchanged for the removal
  619. */
  620. void nilfs_palloc_commit_free_entry(struct inode *inode,
  621. struct nilfs_palloc_req *req)
  622. {
  623. unsigned long group, group_offset;
  624. size_t doff, boff;
  625. struct nilfs_palloc_group_desc *desc;
  626. unsigned char *bitmap;
  627. spinlock_t *lock;
  628. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  629. doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
  630. desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
  631. boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
  632. bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
  633. lock = nilfs_mdt_bgl_lock(inode, group);
  634. if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
  635. nilfs_warn(inode->i_sb,
  636. "%s (ino=%lu): entry number %llu already freed",
  637. __func__, inode->i_ino,
  638. (unsigned long long)req->pr_entry_nr);
  639. else
  640. nilfs_palloc_group_desc_add_entries(desc, lock, 1);
  641. kunmap_local(bitmap);
  642. kunmap_local(desc);
  643. mark_buffer_dirty(req->pr_desc_bh);
  644. mark_buffer_dirty(req->pr_bitmap_bh);
  645. nilfs_mdt_mark_dirty(inode);
  646. brelse(req->pr_bitmap_bh);
  647. brelse(req->pr_desc_bh);
  648. }
  649. /**
  650. * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object
  651. * @inode: inode of metadata file using this allocator
  652. * @req: nilfs_palloc_req structure exchanged for the allocation
  653. */
  654. void nilfs_palloc_abort_alloc_entry(struct inode *inode,
  655. struct nilfs_palloc_req *req)
  656. {
  657. struct nilfs_palloc_group_desc *desc;
  658. size_t doff, boff;
  659. unsigned char *bitmap;
  660. unsigned long group, group_offset;
  661. spinlock_t *lock;
  662. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  663. doff = nilfs_palloc_group_desc_offset(inode, group, req->pr_desc_bh);
  664. desc = kmap_local_folio(req->pr_desc_bh->b_folio, doff);
  665. boff = nilfs_palloc_bitmap_offset(req->pr_bitmap_bh);
  666. bitmap = kmap_local_folio(req->pr_bitmap_bh->b_folio, boff);
  667. lock = nilfs_mdt_bgl_lock(inode, group);
  668. if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
  669. nilfs_warn(inode->i_sb,
  670. "%s (ino=%lu): entry number %llu already freed",
  671. __func__, inode->i_ino,
  672. (unsigned long long)req->pr_entry_nr);
  673. else
  674. nilfs_palloc_group_desc_add_entries(desc, lock, 1);
  675. kunmap_local(bitmap);
  676. kunmap_local(desc);
  677. brelse(req->pr_bitmap_bh);
  678. brelse(req->pr_desc_bh);
  679. req->pr_entry_nr = 0;
  680. req->pr_bitmap_bh = NULL;
  681. req->pr_desc_bh = NULL;
  682. }
  683. /**
  684. * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
  685. * @inode: inode of metadata file using this allocator
  686. * @req: nilfs_palloc_req structure exchanged for the removal
  687. *
  688. * Return: 0 on success, or a negative error code on failure.
  689. */
  690. int nilfs_palloc_prepare_free_entry(struct inode *inode,
  691. struct nilfs_palloc_req *req)
  692. {
  693. struct buffer_head *desc_bh, *bitmap_bh;
  694. unsigned long group, group_offset;
  695. int ret;
  696. group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
  697. ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
  698. if (ret < 0)
  699. return ret;
  700. ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh);
  701. if (ret < 0) {
  702. brelse(desc_bh);
  703. return ret;
  704. }
  705. req->pr_desc_bh = desc_bh;
  706. req->pr_bitmap_bh = bitmap_bh;
  707. return 0;
  708. }
  709. /**
  710. * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object
  711. * @inode: inode of metadata file using this allocator
  712. * @req: nilfs_palloc_req structure exchanged for the removal
  713. */
  714. void nilfs_palloc_abort_free_entry(struct inode *inode,
  715. struct nilfs_palloc_req *req)
  716. {
  717. brelse(req->pr_bitmap_bh);
  718. brelse(req->pr_desc_bh);
  719. req->pr_entry_nr = 0;
  720. req->pr_bitmap_bh = NULL;
  721. req->pr_desc_bh = NULL;
  722. }
  723. /**
  724. * nilfs_palloc_freev - deallocate a set of persistent objects
  725. * @inode: inode of metadata file using this allocator
  726. * @entry_nrs: array of entry numbers to be deallocated
  727. * @nitems: number of entries stored in @entry_nrs
  728. *
  729. * Return: 0 on success, or a negative error code on failure.
  730. */
  731. int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
  732. {
  733. struct buffer_head *desc_bh, *bitmap_bh;
  734. struct nilfs_palloc_group_desc *desc;
  735. unsigned char *bitmap;
  736. size_t doff, boff;
  737. unsigned long group, group_offset;
  738. __u64 group_min_nr, last_nrs[8];
  739. const unsigned long epg = nilfs_palloc_entries_per_group(inode);
  740. const unsigned int epb = NILFS_MDT(inode)->mi_entries_per_block;
  741. unsigned int entry_start, end, pos;
  742. spinlock_t *lock;
  743. int i, j, k, ret;
  744. u32 nfree;
  745. for (i = 0; i < nitems; i = j) {
  746. int change_group = false;
  747. int nempties = 0, n = 0;
  748. group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
  749. ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
  750. if (ret < 0)
  751. return ret;
  752. ret = nilfs_palloc_get_bitmap_block(inode, group, 0,
  753. &bitmap_bh);
  754. if (ret < 0) {
  755. brelse(desc_bh);
  756. return ret;
  757. }
  758. /* Get the first entry number of the group */
  759. group_min_nr = (__u64)group * epg;
  760. boff = nilfs_palloc_bitmap_offset(bitmap_bh);
  761. bitmap = kmap_local_folio(bitmap_bh->b_folio, boff);
  762. lock = nilfs_mdt_bgl_lock(inode, group);
  763. j = i;
  764. entry_start = rounddown(group_offset, epb);
  765. do {
  766. if (!nilfs_clear_bit_atomic(lock, group_offset,
  767. bitmap)) {
  768. nilfs_warn(inode->i_sb,
  769. "%s (ino=%lu): entry number %llu already freed",
  770. __func__, inode->i_ino,
  771. (unsigned long long)entry_nrs[j]);
  772. } else {
  773. n++;
  774. }
  775. j++;
  776. if (j >= nitems || entry_nrs[j] < group_min_nr ||
  777. entry_nrs[j] >= group_min_nr + epg) {
  778. change_group = true;
  779. } else {
  780. group_offset = entry_nrs[j] - group_min_nr;
  781. if (group_offset >= entry_start &&
  782. group_offset < entry_start + epb) {
  783. /* This entry is in the same block */
  784. continue;
  785. }
  786. }
  787. /* Test if the entry block is empty or not */
  788. end = entry_start + epb;
  789. pos = nilfs_find_next_bit(bitmap, end, entry_start);
  790. if (pos >= end) {
  791. last_nrs[nempties++] = entry_nrs[j - 1];
  792. if (nempties >= ARRAY_SIZE(last_nrs))
  793. break;
  794. }
  795. if (change_group)
  796. break;
  797. /* Go on to the next entry block */
  798. entry_start = rounddown(group_offset, epb);
  799. } while (true);
  800. kunmap_local(bitmap);
  801. mark_buffer_dirty(bitmap_bh);
  802. brelse(bitmap_bh);
  803. for (k = 0; k < nempties; k++) {
  804. ret = nilfs_palloc_delete_entry_block(inode,
  805. last_nrs[k]);
  806. if (ret && ret != -ENOENT)
  807. nilfs_warn(inode->i_sb,
  808. "error %d deleting block that object (entry=%llu, ino=%lu) belongs to",
  809. ret, (unsigned long long)last_nrs[k],
  810. inode->i_ino);
  811. }
  812. doff = nilfs_palloc_group_desc_offset(inode, group, desc_bh);
  813. desc = kmap_local_folio(desc_bh->b_folio, doff);
  814. nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
  815. kunmap_local(desc);
  816. mark_buffer_dirty(desc_bh);
  817. nilfs_mdt_mark_dirty(inode);
  818. brelse(desc_bh);
  819. if (nfree == nilfs_palloc_entries_per_group(inode)) {
  820. ret = nilfs_palloc_delete_bitmap_block(inode, group);
  821. if (ret && ret != -ENOENT)
  822. nilfs_warn(inode->i_sb,
  823. "error %d deleting bitmap block of group=%lu, ino=%lu",
  824. ret, group, inode->i_ino);
  825. }
  826. }
  827. return 0;
  828. }
  829. void nilfs_palloc_setup_cache(struct inode *inode,
  830. struct nilfs_palloc_cache *cache)
  831. {
  832. NILFS_MDT(inode)->mi_palloc_cache = cache;
  833. spin_lock_init(&cache->lock);
  834. }
  835. void nilfs_palloc_clear_cache(struct inode *inode)
  836. {
  837. struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
  838. spin_lock(&cache->lock);
  839. brelse(cache->prev_desc.bh);
  840. brelse(cache->prev_bitmap.bh);
  841. brelse(cache->prev_entry.bh);
  842. cache->prev_desc.bh = NULL;
  843. cache->prev_bitmap.bh = NULL;
  844. cache->prev_entry.bh = NULL;
  845. spin_unlock(&cache->lock);
  846. }
  847. void nilfs_palloc_destroy_cache(struct inode *inode)
  848. {
  849. nilfs_palloc_clear_cache(inode);
  850. NILFS_MDT(inode)->mi_palloc_cache = NULL;
  851. }