dm-space-map-common.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2011 Red Hat, Inc.
  4. *
  5. * This file is released under the GPL.
  6. */
  7. #include "dm-space-map-common.h"
  8. #include "dm-transaction-manager.h"
  9. #include "dm-btree-internal.h"
  10. #include "dm-persistent-data-internal.h"
  11. #include <linux/bitops.h>
  12. #include <linux/device-mapper.h>
  13. #define DM_MSG_PREFIX "space map common"
  14. /*----------------------------------------------------------------*/
  15. /*
  16. * Index validator.
  17. */
  18. #define INDEX_CSUM_XOR 160478
  19. static void index_prepare_for_write(const struct dm_block_validator *v,
  20. struct dm_block *b,
  21. size_t block_size)
  22. {
  23. struct disk_metadata_index *mi_le = dm_block_data(b);
  24. mi_le->blocknr = cpu_to_le64(dm_block_location(b));
  25. mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  26. block_size - sizeof(__le32),
  27. INDEX_CSUM_XOR));
  28. }
  29. static int index_check(const struct dm_block_validator *v,
  30. struct dm_block *b,
  31. size_t block_size)
  32. {
  33. struct disk_metadata_index *mi_le = dm_block_data(b);
  34. __le32 csum_disk;
  35. if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
  36. DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu", __func__,
  37. le64_to_cpu(mi_le->blocknr), dm_block_location(b));
  38. return -ENOTBLK;
  39. }
  40. csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  41. block_size - sizeof(__le32),
  42. INDEX_CSUM_XOR));
  43. if (csum_disk != mi_le->csum) {
  44. DMERR_LIMIT("%s failed: csum %u != wanted %u", __func__,
  45. le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
  46. return -EILSEQ;
  47. }
  48. return 0;
  49. }
  50. static const struct dm_block_validator index_validator = {
  51. .name = "index",
  52. .prepare_for_write = index_prepare_for_write,
  53. .check = index_check
  54. };
  55. /*----------------------------------------------------------------*/
  56. /*
  57. * Bitmap validator
  58. */
  59. #define BITMAP_CSUM_XOR 240779
  60. static void dm_bitmap_prepare_for_write(const struct dm_block_validator *v,
  61. struct dm_block *b,
  62. size_t block_size)
  63. {
  64. struct disk_bitmap_header *disk_header = dm_block_data(b);
  65. disk_header->blocknr = cpu_to_le64(dm_block_location(b));
  66. disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
  67. block_size - sizeof(__le32),
  68. BITMAP_CSUM_XOR));
  69. }
  70. static int dm_bitmap_check(const struct dm_block_validator *v,
  71. struct dm_block *b,
  72. size_t block_size)
  73. {
  74. struct disk_bitmap_header *disk_header = dm_block_data(b);
  75. __le32 csum_disk;
  76. if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
  77. DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
  78. le64_to_cpu(disk_header->blocknr), dm_block_location(b));
  79. return -ENOTBLK;
  80. }
  81. csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
  82. block_size - sizeof(__le32),
  83. BITMAP_CSUM_XOR));
  84. if (csum_disk != disk_header->csum) {
  85. DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
  86. le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
  87. return -EILSEQ;
  88. }
  89. return 0;
  90. }
  91. static const struct dm_block_validator dm_sm_bitmap_validator = {
  92. .name = "sm_bitmap",
  93. .prepare_for_write = dm_bitmap_prepare_for_write,
  94. .check = dm_bitmap_check,
  95. };
  96. /*----------------------------------------------------------------*/
  97. #define ENTRIES_PER_WORD 32
  98. #define ENTRIES_SHIFT 5
  99. static void *dm_bitmap_data(struct dm_block *b)
  100. {
  101. return dm_block_data(b) + sizeof(struct disk_bitmap_header);
  102. }
  103. #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
  104. static unsigned int dm_bitmap_word_used(void *addr, unsigned int b)
  105. {
  106. __le64 *words_le = addr;
  107. __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
  108. uint64_t bits = le64_to_cpu(*w_le);
  109. uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
  110. return !(~bits & mask);
  111. }
  112. static unsigned int sm_lookup_bitmap(void *addr, unsigned int b)
  113. {
  114. __le64 *words_le = addr;
  115. __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
  116. unsigned int hi, lo;
  117. b = (b & (ENTRIES_PER_WORD - 1)) << 1;
  118. hi = !!test_bit_le(b, (void *) w_le);
  119. lo = !!test_bit_le(b + 1, (void *) w_le);
  120. return (hi << 1) | lo;
  121. }
  122. static void sm_set_bitmap(void *addr, unsigned int b, unsigned int val)
  123. {
  124. __le64 *words_le = addr;
  125. __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
  126. b = (b & (ENTRIES_PER_WORD - 1)) << 1;
  127. if (val & 2)
  128. __set_bit_le(b, (void *) w_le);
  129. else
  130. __clear_bit_le(b, (void *) w_le);
  131. if (val & 1)
  132. __set_bit_le(b + 1, (void *) w_le);
  133. else
  134. __clear_bit_le(b + 1, (void *) w_le);
  135. }
  136. static int sm_find_free(void *addr, unsigned int begin, unsigned int end,
  137. unsigned int *result)
  138. {
  139. while (begin < end) {
  140. if (!(begin & (ENTRIES_PER_WORD - 1)) &&
  141. dm_bitmap_word_used(addr, begin)) {
  142. begin += ENTRIES_PER_WORD;
  143. continue;
  144. }
  145. if (!sm_lookup_bitmap(addr, begin)) {
  146. *result = begin;
  147. return 0;
  148. }
  149. begin++;
  150. }
  151. return -ENOSPC;
  152. }
  153. /*----------------------------------------------------------------*/
  154. static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
  155. {
  156. memset(ll, 0, sizeof(struct ll_disk));
  157. ll->tm = tm;
  158. ll->bitmap_info.tm = tm;
  159. ll->bitmap_info.levels = 1;
  160. /*
  161. * Because the new bitmap blocks are created via a shadow
  162. * operation, the old entry has already had its reference count
  163. * decremented and we don't need the btree to do any bookkeeping.
  164. */
  165. ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
  166. ll->bitmap_info.value_type.inc = NULL;
  167. ll->bitmap_info.value_type.dec = NULL;
  168. ll->bitmap_info.value_type.equal = NULL;
  169. ll->ref_count_info.tm = tm;
  170. ll->ref_count_info.levels = 1;
  171. ll->ref_count_info.value_type.size = sizeof(uint32_t);
  172. ll->ref_count_info.value_type.inc = NULL;
  173. ll->ref_count_info.value_type.dec = NULL;
  174. ll->ref_count_info.value_type.equal = NULL;
  175. ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
  176. if (ll->block_size > (1 << 30)) {
  177. DMERR("block size too big to hold bitmaps");
  178. return -EINVAL;
  179. }
  180. ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
  181. ENTRIES_PER_BYTE;
  182. ll->nr_blocks = 0;
  183. ll->bitmap_root = 0;
  184. ll->ref_count_root = 0;
  185. ll->bitmap_index_changed = false;
  186. return 0;
  187. }
  188. int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
  189. {
  190. int r;
  191. dm_block_t i, nr_blocks, nr_indexes;
  192. unsigned int old_blocks, blocks;
  193. nr_blocks = ll->nr_blocks + extra_blocks;
  194. old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
  195. blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
  196. nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
  197. if (nr_indexes > ll->max_entries(ll)) {
  198. DMERR("space map too large");
  199. return -EINVAL;
  200. }
  201. /*
  202. * We need to set this before the dm_tm_new_block() call below.
  203. */
  204. ll->nr_blocks = nr_blocks;
  205. for (i = old_blocks; i < blocks; i++) {
  206. struct dm_block *b;
  207. struct disk_index_entry idx;
  208. r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
  209. if (r < 0)
  210. return r;
  211. idx.blocknr = cpu_to_le64(dm_block_location(b));
  212. dm_tm_unlock(ll->tm, b);
  213. idx.nr_free = cpu_to_le32(ll->entries_per_block);
  214. idx.none_free_before = 0;
  215. r = ll->save_ie(ll, i, &idx);
  216. if (r < 0)
  217. return r;
  218. }
  219. return 0;
  220. }
  221. int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
  222. {
  223. int r;
  224. dm_block_t index = b;
  225. struct disk_index_entry ie_disk;
  226. struct dm_block *blk;
  227. if (b >= ll->nr_blocks) {
  228. DMERR_LIMIT("metadata block out of bounds");
  229. return -EINVAL;
  230. }
  231. b = do_div(index, ll->entries_per_block);
  232. r = ll->load_ie(ll, index, &ie_disk);
  233. if (r < 0)
  234. return r;
  235. r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
  236. &dm_sm_bitmap_validator, &blk);
  237. if (r < 0)
  238. return r;
  239. *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
  240. dm_tm_unlock(ll->tm, blk);
  241. return 0;
  242. }
  243. static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
  244. uint32_t *result)
  245. {
  246. __le32 le_rc;
  247. int r;
  248. r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
  249. if (r < 0)
  250. return r;
  251. *result = le32_to_cpu(le_rc);
  252. return r;
  253. }
  254. int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
  255. {
  256. int r = sm_ll_lookup_bitmap(ll, b, result);
  257. if (r)
  258. return r;
  259. if (*result != 3)
  260. return r;
  261. return sm_ll_lookup_big_ref_count(ll, b, result);
  262. }
  263. int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
  264. dm_block_t end, dm_block_t *result)
  265. {
  266. int r;
  267. struct disk_index_entry ie_disk;
  268. dm_block_t i, index_begin = begin;
  269. dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
  270. /*
  271. * FIXME: Use shifts
  272. */
  273. begin = do_div(index_begin, ll->entries_per_block);
  274. end = do_div(end, ll->entries_per_block);
  275. if (end == 0)
  276. end = ll->entries_per_block;
  277. for (i = index_begin; i < index_end; i++, begin = 0) {
  278. struct dm_block *blk;
  279. unsigned int position;
  280. uint32_t bit_end;
  281. r = ll->load_ie(ll, i, &ie_disk);
  282. if (r < 0)
  283. return r;
  284. if (le32_to_cpu(ie_disk.nr_free) == 0)
  285. continue;
  286. r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
  287. &dm_sm_bitmap_validator, &blk);
  288. if (r < 0)
  289. return r;
  290. bit_end = (i == index_end - 1) ? end : ll->entries_per_block;
  291. r = sm_find_free(dm_bitmap_data(blk),
  292. max_t(unsigned int, begin, le32_to_cpu(ie_disk.none_free_before)),
  293. bit_end, &position);
  294. if (r == -ENOSPC) {
  295. /*
  296. * This might happen because we started searching
  297. * part way through the bitmap.
  298. */
  299. dm_tm_unlock(ll->tm, blk);
  300. continue;
  301. }
  302. dm_tm_unlock(ll->tm, blk);
  303. *result = i * ll->entries_per_block + (dm_block_t) position;
  304. return 0;
  305. }
  306. return -ENOSPC;
  307. }
  308. int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
  309. dm_block_t begin, dm_block_t end, dm_block_t *b)
  310. {
  311. int r;
  312. uint32_t count;
  313. do {
  314. r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
  315. if (r)
  316. break;
  317. /* double check this block wasn't used in the old transaction */
  318. if (*b >= old_ll->nr_blocks)
  319. count = 0;
  320. else {
  321. r = sm_ll_lookup(old_ll, *b, &count);
  322. if (r)
  323. break;
  324. if (count)
  325. begin = *b + 1;
  326. }
  327. } while (count);
  328. return r;
  329. }
  330. /*----------------------------------------------------------------*/
  331. int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
  332. uint32_t ref_count, int32_t *nr_allocations)
  333. {
  334. int r;
  335. uint32_t bit, old;
  336. struct dm_block *nb;
  337. dm_block_t index = b;
  338. struct disk_index_entry ie_disk;
  339. void *bm_le;
  340. int inc;
  341. bit = do_div(index, ll->entries_per_block);
  342. r = ll->load_ie(ll, index, &ie_disk);
  343. if (r < 0)
  344. return r;
  345. r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
  346. &dm_sm_bitmap_validator, &nb, &inc);
  347. if (r < 0) {
  348. DMERR("dm_tm_shadow_block() failed");
  349. return r;
  350. }
  351. ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
  352. bm_le = dm_bitmap_data(nb);
  353. old = sm_lookup_bitmap(bm_le, bit);
  354. if (old > 2) {
  355. r = sm_ll_lookup_big_ref_count(ll, b, &old);
  356. if (r < 0) {
  357. dm_tm_unlock(ll->tm, nb);
  358. return r;
  359. }
  360. }
  361. if (r) {
  362. dm_tm_unlock(ll->tm, nb);
  363. return r;
  364. }
  365. if (ref_count <= 2) {
  366. sm_set_bitmap(bm_le, bit, ref_count);
  367. dm_tm_unlock(ll->tm, nb);
  368. if (old > 2) {
  369. r = dm_btree_remove(&ll->ref_count_info,
  370. ll->ref_count_root,
  371. &b, &ll->ref_count_root);
  372. if (r)
  373. return r;
  374. }
  375. } else {
  376. __le32 le_rc = cpu_to_le32(ref_count);
  377. sm_set_bitmap(bm_le, bit, 3);
  378. dm_tm_unlock(ll->tm, nb);
  379. __dm_bless_for_disk(&le_rc);
  380. r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
  381. &b, &le_rc, &ll->ref_count_root);
  382. if (r < 0) {
  383. DMERR("ref count insert failed");
  384. return r;
  385. }
  386. }
  387. if (ref_count && !old) {
  388. *nr_allocations = 1;
  389. ll->nr_allocated++;
  390. le32_add_cpu(&ie_disk.nr_free, -1);
  391. if (le32_to_cpu(ie_disk.none_free_before) == bit)
  392. ie_disk.none_free_before = cpu_to_le32(bit + 1);
  393. } else if (old && !ref_count) {
  394. *nr_allocations = -1;
  395. ll->nr_allocated--;
  396. le32_add_cpu(&ie_disk.nr_free, 1);
  397. ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
  398. } else
  399. *nr_allocations = 0;
  400. return ll->save_ie(ll, index, &ie_disk);
  401. }
  402. /*----------------------------------------------------------------*/
  403. /*
  404. * Holds useful intermediate results for the range based inc and dec
  405. * operations.
  406. */
  407. struct inc_context {
  408. struct disk_index_entry ie_disk;
  409. struct dm_block *bitmap_block;
  410. void *bitmap;
  411. struct dm_block *overflow_leaf;
  412. };
  413. static inline void init_inc_context(struct inc_context *ic)
  414. {
  415. ic->bitmap_block = NULL;
  416. ic->bitmap = NULL;
  417. ic->overflow_leaf = NULL;
  418. }
  419. static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
  420. {
  421. if (ic->bitmap_block)
  422. dm_tm_unlock(ll->tm, ic->bitmap_block);
  423. if (ic->overflow_leaf)
  424. dm_tm_unlock(ll->tm, ic->overflow_leaf);
  425. }
  426. static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
  427. {
  428. exit_inc_context(ll, ic);
  429. init_inc_context(ic);
  430. }
  431. /*
  432. * Confirms a btree node contains a particular key at an index.
  433. */
  434. static bool contains_key(struct btree_node *n, uint64_t key, int index)
  435. {
  436. return index >= 0 &&
  437. index < le32_to_cpu(n->header.nr_entries) &&
  438. le64_to_cpu(n->keys[index]) == key;
  439. }
  440. static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
  441. {
  442. int r;
  443. int index;
  444. struct btree_node *n;
  445. __le32 *v_ptr;
  446. uint32_t rc;
  447. /*
  448. * bitmap_block needs to be unlocked because getting the
  449. * overflow_leaf may need to allocate, and thus use the space map.
  450. */
  451. reset_inc_context(ll, ic);
  452. r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
  453. b, &index, &ll->ref_count_root, &ic->overflow_leaf);
  454. if (r < 0)
  455. return r;
  456. n = dm_block_data(ic->overflow_leaf);
  457. if (!contains_key(n, b, index)) {
  458. DMERR("overflow btree is missing an entry");
  459. return -EINVAL;
  460. }
  461. v_ptr = value_ptr(n, index);
  462. rc = le32_to_cpu(*v_ptr) + 1;
  463. *v_ptr = cpu_to_le32(rc);
  464. return 0;
  465. }
  466. static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
  467. {
  468. int index;
  469. struct btree_node *n;
  470. __le32 *v_ptr;
  471. uint32_t rc;
  472. /*
  473. * Do we already have the correct overflow leaf?
  474. */
  475. if (ic->overflow_leaf) {
  476. n = dm_block_data(ic->overflow_leaf);
  477. index = lower_bound(n, b);
  478. if (contains_key(n, b, index)) {
  479. v_ptr = value_ptr(n, index);
  480. rc = le32_to_cpu(*v_ptr) + 1;
  481. *v_ptr = cpu_to_le32(rc);
  482. return 0;
  483. }
  484. }
  485. return __sm_ll_inc_overflow(ll, b, ic);
  486. }
  487. static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
  488. {
  489. int r, inc;
  490. r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
  491. &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
  492. if (r < 0) {
  493. DMERR("dm_tm_shadow_block() failed");
  494. return r;
  495. }
  496. ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
  497. ic->bitmap = dm_bitmap_data(ic->bitmap_block);
  498. return 0;
  499. }
  500. /*
  501. * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
  502. * we can reopen the bitmap with a simple write lock, rather than re calling
  503. * dm_tm_shadow_block().
  504. */
  505. static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
  506. {
  507. if (!ic->bitmap_block) {
  508. int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
  509. &dm_sm_bitmap_validator, &ic->bitmap_block);
  510. if (r) {
  511. DMERR("unable to re-get write lock for bitmap");
  512. return r;
  513. }
  514. ic->bitmap = dm_bitmap_data(ic->bitmap_block);
  515. }
  516. return 0;
  517. }
  518. /*
  519. * Loops round incrementing entries in a single bitmap.
  520. */
  521. static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
  522. uint32_t bit, uint32_t bit_end,
  523. int32_t *nr_allocations, dm_block_t *new_b,
  524. struct inc_context *ic)
  525. {
  526. int r;
  527. __le32 le_rc;
  528. uint32_t old;
  529. for (; bit != bit_end; bit++, b++) {
  530. /*
  531. * We only need to drop the bitmap if we need to find a new btree
  532. * leaf for the overflow. So if it was dropped last iteration,
  533. * we now re-get it.
  534. */
  535. r = ensure_bitmap(ll, ic);
  536. if (r)
  537. return r;
  538. old = sm_lookup_bitmap(ic->bitmap, bit);
  539. switch (old) {
  540. case 0:
  541. /* inc bitmap, adjust nr_allocated */
  542. sm_set_bitmap(ic->bitmap, bit, 1);
  543. (*nr_allocations)++;
  544. ll->nr_allocated++;
  545. le32_add_cpu(&ic->ie_disk.nr_free, -1);
  546. if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
  547. ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
  548. break;
  549. case 1:
  550. /* inc bitmap */
  551. sm_set_bitmap(ic->bitmap, bit, 2);
  552. break;
  553. case 2:
  554. /* inc bitmap and insert into overflow */
  555. sm_set_bitmap(ic->bitmap, bit, 3);
  556. reset_inc_context(ll, ic);
  557. le_rc = cpu_to_le32(3);
  558. __dm_bless_for_disk(&le_rc);
  559. r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
  560. &b, &le_rc, &ll->ref_count_root);
  561. if (r < 0) {
  562. DMERR("ref count insert failed");
  563. return r;
  564. }
  565. break;
  566. default:
  567. /*
  568. * inc within the overflow tree only.
  569. */
  570. r = sm_ll_inc_overflow(ll, b, ic);
  571. if (r < 0)
  572. return r;
  573. }
  574. }
  575. *new_b = b;
  576. return 0;
  577. }
  578. /*
  579. * Finds a bitmap that contains entries in the block range, and increments
  580. * them.
  581. */
  582. static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
  583. int32_t *nr_allocations, dm_block_t *new_b)
  584. {
  585. int r;
  586. struct inc_context ic;
  587. uint32_t bit, bit_end;
  588. dm_block_t index = b;
  589. init_inc_context(&ic);
  590. bit = do_div(index, ll->entries_per_block);
  591. r = ll->load_ie(ll, index, &ic.ie_disk);
  592. if (r < 0)
  593. return r;
  594. r = shadow_bitmap(ll, &ic);
  595. if (r)
  596. return r;
  597. bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
  598. r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
  599. exit_inc_context(ll, &ic);
  600. if (r)
  601. return r;
  602. return ll->save_ie(ll, index, &ic.ie_disk);
  603. }
  604. int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
  605. int32_t *nr_allocations)
  606. {
  607. *nr_allocations = 0;
  608. while (b != e) {
  609. int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
  610. if (r)
  611. return r;
  612. }
  613. return 0;
  614. }
  615. /*----------------------------------------------------------------*/
  616. static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
  617. struct inc_context *ic)
  618. {
  619. reset_inc_context(ll, ic);
  620. return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
  621. &b, &ll->ref_count_root);
  622. }
  623. static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
  624. struct inc_context *ic, uint32_t *old_rc)
  625. {
  626. int r;
  627. int index = -1;
  628. struct btree_node *n;
  629. __le32 *v_ptr;
  630. uint32_t rc;
  631. reset_inc_context(ll, ic);
  632. r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
  633. b, &index, &ll->ref_count_root, &ic->overflow_leaf);
  634. if (r < 0)
  635. return r;
  636. n = dm_block_data(ic->overflow_leaf);
  637. if (!contains_key(n, b, index)) {
  638. DMERR("overflow btree is missing an entry");
  639. return -EINVAL;
  640. }
  641. v_ptr = value_ptr(n, index);
  642. rc = le32_to_cpu(*v_ptr);
  643. *old_rc = rc;
  644. if (rc == 3)
  645. return __sm_ll_del_overflow(ll, b, ic);
  646. rc--;
  647. *v_ptr = cpu_to_le32(rc);
  648. return 0;
  649. }
  650. static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
  651. struct inc_context *ic, uint32_t *old_rc)
  652. {
  653. /*
  654. * Do we already have the correct overflow leaf?
  655. */
  656. if (ic->overflow_leaf) {
  657. int index;
  658. struct btree_node *n;
  659. __le32 *v_ptr;
  660. uint32_t rc;
  661. n = dm_block_data(ic->overflow_leaf);
  662. index = lower_bound(n, b);
  663. if (contains_key(n, b, index)) {
  664. v_ptr = value_ptr(n, index);
  665. rc = le32_to_cpu(*v_ptr);
  666. *old_rc = rc;
  667. if (rc > 3) {
  668. rc--;
  669. *v_ptr = cpu_to_le32(rc);
  670. return 0;
  671. } else {
  672. return __sm_ll_del_overflow(ll, b, ic);
  673. }
  674. }
  675. }
  676. return __sm_ll_dec_overflow(ll, b, ic, old_rc);
  677. }
  678. /*
  679. * Loops round incrementing entries in a single bitmap.
  680. */
  681. static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
  682. uint32_t bit, uint32_t bit_end,
  683. struct inc_context *ic,
  684. int32_t *nr_allocations, dm_block_t *new_b)
  685. {
  686. int r;
  687. uint32_t old;
  688. for (; bit != bit_end; bit++, b++) {
  689. /*
  690. * We only need to drop the bitmap if we need to find a new btree
  691. * leaf for the overflow. So if it was dropped last iteration,
  692. * we now re-get it.
  693. */
  694. r = ensure_bitmap(ll, ic);
  695. if (r)
  696. return r;
  697. old = sm_lookup_bitmap(ic->bitmap, bit);
  698. switch (old) {
  699. case 0:
  700. DMERR("unable to decrement block");
  701. return -EINVAL;
  702. case 1:
  703. /* dec bitmap */
  704. sm_set_bitmap(ic->bitmap, bit, 0);
  705. (*nr_allocations)--;
  706. ll->nr_allocated--;
  707. le32_add_cpu(&ic->ie_disk.nr_free, 1);
  708. ic->ie_disk.none_free_before =
  709. cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
  710. break;
  711. case 2:
  712. /* dec bitmap and insert into overflow */
  713. sm_set_bitmap(ic->bitmap, bit, 1);
  714. break;
  715. case 3:
  716. r = sm_ll_dec_overflow(ll, b, ic, &old);
  717. if (r < 0)
  718. return r;
  719. if (old == 3) {
  720. r = ensure_bitmap(ll, ic);
  721. if (r)
  722. return r;
  723. sm_set_bitmap(ic->bitmap, bit, 2);
  724. }
  725. break;
  726. }
  727. }
  728. *new_b = b;
  729. return 0;
  730. }
  731. static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
  732. int32_t *nr_allocations, dm_block_t *new_b)
  733. {
  734. int r;
  735. uint32_t bit, bit_end;
  736. struct inc_context ic;
  737. dm_block_t index = b;
  738. init_inc_context(&ic);
  739. bit = do_div(index, ll->entries_per_block);
  740. r = ll->load_ie(ll, index, &ic.ie_disk);
  741. if (r < 0)
  742. return r;
  743. r = shadow_bitmap(ll, &ic);
  744. if (r)
  745. return r;
  746. bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
  747. r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
  748. exit_inc_context(ll, &ic);
  749. if (r)
  750. return r;
  751. return ll->save_ie(ll, index, &ic.ie_disk);
  752. }
  753. int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
  754. int32_t *nr_allocations)
  755. {
  756. *nr_allocations = 0;
  757. while (b != e) {
  758. int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
  759. if (r)
  760. return r;
  761. }
  762. return 0;
  763. }
  764. /*----------------------------------------------------------------*/
  765. int sm_ll_commit(struct ll_disk *ll)
  766. {
  767. int r = 0;
  768. if (ll->bitmap_index_changed) {
  769. r = ll->commit(ll);
  770. if (!r)
  771. ll->bitmap_index_changed = false;
  772. }
  773. return r;
  774. }
  775. /*----------------------------------------------------------------*/
  776. static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
  777. struct disk_index_entry *ie)
  778. {
  779. memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
  780. return 0;
  781. }
  782. static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
  783. struct disk_index_entry *ie)
  784. {
  785. ll->bitmap_index_changed = true;
  786. memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
  787. return 0;
  788. }
  789. static int metadata_ll_init_index(struct ll_disk *ll)
  790. {
  791. int r;
  792. struct dm_block *b;
  793. r = dm_tm_new_block(ll->tm, &index_validator, &b);
  794. if (r < 0)
  795. return r;
  796. ll->bitmap_root = dm_block_location(b);
  797. dm_tm_unlock(ll->tm, b);
  798. return 0;
  799. }
  800. static int metadata_ll_open(struct ll_disk *ll)
  801. {
  802. int r;
  803. struct dm_block *block;
  804. r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
  805. &index_validator, &block);
  806. if (r)
  807. return r;
  808. memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
  809. dm_tm_unlock(ll->tm, block);
  810. return 0;
  811. }
  812. static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
  813. {
  814. return MAX_METADATA_BITMAPS;
  815. }
  816. static int metadata_ll_commit(struct ll_disk *ll)
  817. {
  818. int r, inc;
  819. struct dm_block *b;
  820. r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
  821. if (r)
  822. return r;
  823. memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
  824. ll->bitmap_root = dm_block_location(b);
  825. dm_tm_unlock(ll->tm, b);
  826. return 0;
  827. }
  828. int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
  829. {
  830. int r;
  831. r = sm_ll_init(ll, tm);
  832. if (r < 0)
  833. return r;
  834. ll->load_ie = metadata_ll_load_ie;
  835. ll->save_ie = metadata_ll_save_ie;
  836. ll->init_index = metadata_ll_init_index;
  837. ll->open_index = metadata_ll_open;
  838. ll->max_entries = metadata_ll_max_entries;
  839. ll->commit = metadata_ll_commit;
  840. ll->nr_blocks = 0;
  841. ll->nr_allocated = 0;
  842. r = ll->init_index(ll);
  843. if (r < 0)
  844. return r;
  845. r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
  846. if (r < 0)
  847. return r;
  848. return 0;
  849. }
  850. int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
  851. void *root_le, size_t len)
  852. {
  853. int r;
  854. struct disk_sm_root smr;
  855. if (len < sizeof(struct disk_sm_root)) {
  856. DMERR("sm_metadata root too small");
  857. return -ENOMEM;
  858. }
  859. /*
  860. * We don't know the alignment of the root_le buffer, so need to
  861. * copy into a new structure.
  862. */
  863. memcpy(&smr, root_le, sizeof(smr));
  864. r = sm_ll_init(ll, tm);
  865. if (r < 0)
  866. return r;
  867. ll->load_ie = metadata_ll_load_ie;
  868. ll->save_ie = metadata_ll_save_ie;
  869. ll->init_index = metadata_ll_init_index;
  870. ll->open_index = metadata_ll_open;
  871. ll->max_entries = metadata_ll_max_entries;
  872. ll->commit = metadata_ll_commit;
  873. ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
  874. ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
  875. ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
  876. ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
  877. return ll->open_index(ll);
  878. }
  879. /*----------------------------------------------------------------*/
  880. static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
  881. {
  882. iec->dirty = false;
  883. __dm_bless_for_disk(iec->ie);
  884. return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
  885. &iec->index, &iec->ie, &ll->bitmap_root);
  886. }
  887. static inline unsigned int hash_index(dm_block_t index)
  888. {
  889. return dm_hash_block(index, IE_CACHE_MASK);
  890. }
  891. static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
  892. struct disk_index_entry *ie)
  893. {
  894. int r;
  895. unsigned int h = hash_index(index);
  896. struct ie_cache *iec = ll->ie_cache + h;
  897. if (iec->valid) {
  898. if (iec->index == index) {
  899. memcpy(ie, &iec->ie, sizeof(*ie));
  900. return 0;
  901. }
  902. if (iec->dirty) {
  903. r = ie_cache_writeback(ll, iec);
  904. if (r)
  905. return r;
  906. }
  907. }
  908. r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
  909. if (!r) {
  910. iec->valid = true;
  911. iec->dirty = false;
  912. iec->index = index;
  913. memcpy(&iec->ie, ie, sizeof(*ie));
  914. }
  915. return r;
  916. }
  917. static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
  918. struct disk_index_entry *ie)
  919. {
  920. int r;
  921. unsigned int h = hash_index(index);
  922. struct ie_cache *iec = ll->ie_cache + h;
  923. ll->bitmap_index_changed = true;
  924. if (iec->valid) {
  925. if (iec->index == index) {
  926. memcpy(&iec->ie, ie, sizeof(*ie));
  927. iec->dirty = true;
  928. return 0;
  929. }
  930. if (iec->dirty) {
  931. r = ie_cache_writeback(ll, iec);
  932. if (r)
  933. return r;
  934. }
  935. }
  936. iec->valid = true;
  937. iec->dirty = true;
  938. iec->index = index;
  939. memcpy(&iec->ie, ie, sizeof(*ie));
  940. return 0;
  941. }
  942. static int disk_ll_init_index(struct ll_disk *ll)
  943. {
  944. unsigned int i;
  945. for (i = 0; i < IE_CACHE_SIZE; i++) {
  946. struct ie_cache *iec = ll->ie_cache + i;
  947. iec->valid = false;
  948. iec->dirty = false;
  949. }
  950. return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
  951. }
  952. static int disk_ll_open(struct ll_disk *ll)
  953. {
  954. return 0;
  955. }
  956. static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
  957. {
  958. return -1ULL;
  959. }
  960. static int disk_ll_commit(struct ll_disk *ll)
  961. {
  962. int r = 0;
  963. unsigned int i;
  964. for (i = 0; i < IE_CACHE_SIZE; i++) {
  965. struct ie_cache *iec = ll->ie_cache + i;
  966. if (iec->valid && iec->dirty)
  967. r = ie_cache_writeback(ll, iec);
  968. }
  969. return r;
  970. }
  971. int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
  972. {
  973. int r;
  974. r = sm_ll_init(ll, tm);
  975. if (r < 0)
  976. return r;
  977. ll->load_ie = disk_ll_load_ie;
  978. ll->save_ie = disk_ll_save_ie;
  979. ll->init_index = disk_ll_init_index;
  980. ll->open_index = disk_ll_open;
  981. ll->max_entries = disk_ll_max_entries;
  982. ll->commit = disk_ll_commit;
  983. ll->nr_blocks = 0;
  984. ll->nr_allocated = 0;
  985. r = ll->init_index(ll);
  986. if (r < 0)
  987. return r;
  988. r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
  989. if (r < 0)
  990. return r;
  991. return 0;
  992. }
  993. int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
  994. void *root_le, size_t len)
  995. {
  996. int r;
  997. struct disk_sm_root *smr = root_le;
  998. if (len < sizeof(struct disk_sm_root)) {
  999. DMERR("sm_metadata root too small");
  1000. return -ENOMEM;
  1001. }
  1002. r = sm_ll_init(ll, tm);
  1003. if (r < 0)
  1004. return r;
  1005. ll->load_ie = disk_ll_load_ie;
  1006. ll->save_ie = disk_ll_save_ie;
  1007. ll->init_index = disk_ll_init_index;
  1008. ll->open_index = disk_ll_open;
  1009. ll->max_entries = disk_ll_max_entries;
  1010. ll->commit = disk_ll_commit;
  1011. ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
  1012. ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
  1013. ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
  1014. ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
  1015. return ll->open_index(ll);
  1016. }
  1017. /*----------------------------------------------------------------*/