dm-table.c 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2001 Sistina Software (UK) Limited.
  4. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  5. *
  6. * This file is released under the GPL.
  7. */
  8. #include "dm-core.h"
  9. #include "dm-rq.h"
  10. #include <linux/module.h>
  11. #include <linux/vmalloc.h>
  12. #include <linux/blkdev.h>
  13. #include <linux/blk-integrity.h>
  14. #include <linux/namei.h>
  15. #include <linux/ctype.h>
  16. #include <linux/string.h>
  17. #include <linux/slab.h>
  18. #include <linux/interrupt.h>
  19. #include <linux/mutex.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include <linux/blk-mq.h>
  23. #include <linux/mount.h>
  24. #include <linux/dax.h>
  25. #define DM_MSG_PREFIX "table"
  26. #define NODE_SIZE L1_CACHE_BYTES
  27. #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
  28. #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
  29. /*
  30. * Similar to ceiling(log_size(n))
  31. */
  32. static unsigned int int_log(unsigned int n, unsigned int base)
  33. {
  34. int result = 0;
  35. while (n > 1) {
  36. n = dm_div_up(n, base);
  37. result++;
  38. }
  39. return result;
  40. }
  41. /*
  42. * Calculate the index of the child node of the n'th node k'th key.
  43. */
  44. static inline unsigned int get_child(unsigned int n, unsigned int k)
  45. {
  46. return (n * CHILDREN_PER_NODE) + k;
  47. }
  48. /*
  49. * Return the n'th node of level l from table t.
  50. */
  51. static inline sector_t *get_node(struct dm_table *t,
  52. unsigned int l, unsigned int n)
  53. {
  54. return t->index[l] + (n * KEYS_PER_NODE);
  55. }
  56. /*
  57. * Return the highest key that you could lookup from the n'th
  58. * node on level l of the btree.
  59. */
  60. static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
  61. {
  62. for (; l < t->depth - 1; l++)
  63. n = get_child(n, CHILDREN_PER_NODE - 1);
  64. if (n >= t->counts[l])
  65. return (sector_t) -1;
  66. return get_node(t, l, n)[KEYS_PER_NODE - 1];
  67. }
  68. /*
  69. * Fills in a level of the btree based on the highs of the level
  70. * below it.
  71. */
  72. static int setup_btree_index(unsigned int l, struct dm_table *t)
  73. {
  74. unsigned int n, k;
  75. sector_t *node;
  76. for (n = 0U; n < t->counts[l]; n++) {
  77. node = get_node(t, l, n);
  78. for (k = 0U; k < KEYS_PER_NODE; k++)
  79. node[k] = high(t, l + 1, get_child(n, k));
  80. }
  81. return 0;
  82. }
  83. /*
  84. * highs, and targets are managed as dynamic arrays during a
  85. * table load.
  86. */
  87. static int alloc_targets(struct dm_table *t, unsigned int num)
  88. {
  89. sector_t *n_highs;
  90. struct dm_target *n_targets;
  91. /*
  92. * Allocate both the target array and offset array at once.
  93. */
  94. n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t),
  95. GFP_KERNEL);
  96. if (!n_highs)
  97. return -ENOMEM;
  98. n_targets = (struct dm_target *) (n_highs + num);
  99. memset(n_highs, -1, sizeof(*n_highs) * num);
  100. t->num_allocated = num;
  101. t->highs = n_highs;
  102. t->targets = n_targets;
  103. return 0;
  104. }
  105. int dm_table_create(struct dm_table **result, blk_mode_t mode,
  106. unsigned int num_targets, struct mapped_device *md)
  107. {
  108. struct dm_table *t;
  109. if (num_targets > DM_MAX_TARGETS)
  110. return -EOVERFLOW;
  111. t = kzalloc_obj(*t);
  112. if (!t)
  113. return -ENOMEM;
  114. INIT_LIST_HEAD(&t->devices);
  115. if (!num_targets)
  116. num_targets = KEYS_PER_NODE;
  117. num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
  118. if (!num_targets) {
  119. kfree(t);
  120. return -EOVERFLOW;
  121. }
  122. if (alloc_targets(t, num_targets)) {
  123. kfree(t);
  124. return -ENOMEM;
  125. }
  126. t->type = DM_TYPE_NONE;
  127. t->mode = mode;
  128. t->md = md;
  129. t->flush_bypasses_map = true;
  130. *result = t;
  131. return 0;
  132. }
  133. static void free_devices(struct list_head *devices, struct mapped_device *md)
  134. {
  135. struct list_head *tmp, *next;
  136. list_for_each_safe(tmp, next, devices) {
  137. struct dm_dev_internal *dd =
  138. list_entry(tmp, struct dm_dev_internal, list);
  139. DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
  140. dm_device_name(md), dd->dm_dev->name);
  141. dm_put_table_device(md, dd->dm_dev);
  142. kfree(dd);
  143. }
  144. }
  145. static void dm_table_destroy_crypto_profile(struct dm_table *t);
  146. void dm_table_destroy(struct dm_table *t)
  147. {
  148. if (!t)
  149. return;
  150. /* free the indexes */
  151. if (t->depth >= 2)
  152. kvfree(t->index[t->depth - 2]);
  153. /* free the targets */
  154. for (unsigned int i = 0; i < t->num_targets; i++) {
  155. struct dm_target *ti = dm_table_get_target(t, i);
  156. if (ti->type->dtr)
  157. ti->type->dtr(ti);
  158. dm_put_target_type(ti->type);
  159. }
  160. kvfree(t->highs);
  161. /* free the device list */
  162. free_devices(&t->devices, t->md);
  163. dm_free_md_mempools(t->mempools);
  164. dm_table_destroy_crypto_profile(t);
  165. kfree(t);
  166. }
  167. /*
  168. * See if we've already got a device in the list.
  169. */
  170. static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
  171. {
  172. struct dm_dev_internal *dd;
  173. list_for_each_entry(dd, l, list)
  174. if (dd->dm_dev->bdev->bd_dev == dev)
  175. return dd;
  176. return NULL;
  177. }
  178. /*
  179. * If possible, this checks an area of a destination device is invalid.
  180. */
  181. static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
  182. sector_t start, sector_t len, void *data)
  183. {
  184. struct queue_limits *limits = data;
  185. struct block_device *bdev = dev->bdev;
  186. sector_t dev_size = bdev_nr_sectors(bdev);
  187. unsigned short logical_block_size_sectors =
  188. limits->logical_block_size >> SECTOR_SHIFT;
  189. if (!dev_size)
  190. return 0;
  191. if ((start >= dev_size) || (start + len > dev_size)) {
  192. DMERR("%s: %pg too small for target: start=%llu, len=%llu, dev_size=%llu",
  193. dm_device_name(ti->table->md), bdev,
  194. (unsigned long long)start,
  195. (unsigned long long)len,
  196. (unsigned long long)dev_size);
  197. return 1;
  198. }
  199. /*
  200. * If the target is mapped to zoned block device(s), check
  201. * that the zones are not partially mapped.
  202. */
  203. if (bdev_is_zoned(bdev)) {
  204. unsigned int zone_sectors = bdev_zone_sectors(bdev);
  205. if (!bdev_is_zone_aligned(bdev, start)) {
  206. DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
  207. dm_device_name(ti->table->md),
  208. (unsigned long long)start,
  209. zone_sectors, bdev);
  210. return 1;
  211. }
  212. /*
  213. * Note: The last zone of a zoned block device may be smaller
  214. * than other zones. So for a target mapping the end of a
  215. * zoned block device with such a zone, len would not be zone
  216. * aligned. We do not allow such last smaller zone to be part
  217. * of the mapping here to ensure that mappings with multiple
  218. * devices do not end up with a smaller zone in the middle of
  219. * the sector range.
  220. */
  221. if (!bdev_is_zone_aligned(bdev, len)) {
  222. DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
  223. dm_device_name(ti->table->md),
  224. (unsigned long long)len,
  225. zone_sectors, bdev);
  226. return 1;
  227. }
  228. }
  229. if (logical_block_size_sectors <= 1)
  230. return 0;
  231. if (start & (logical_block_size_sectors - 1)) {
  232. DMERR("%s: start=%llu not aligned to h/w logical block size %u of %pg",
  233. dm_device_name(ti->table->md),
  234. (unsigned long long)start,
  235. limits->logical_block_size, bdev);
  236. return 1;
  237. }
  238. if (len & (logical_block_size_sectors - 1)) {
  239. DMERR("%s: len=%llu not aligned to h/w logical block size %u of %pg",
  240. dm_device_name(ti->table->md),
  241. (unsigned long long)len,
  242. limits->logical_block_size, bdev);
  243. return 1;
  244. }
  245. return 0;
  246. }
  247. /*
  248. * This upgrades the mode on an already open dm_dev, being
  249. * careful to leave things as they were if we fail to reopen the
  250. * device and not to touch the existing bdev field in case
  251. * it is accessed concurrently.
  252. */
  253. static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
  254. struct mapped_device *md)
  255. {
  256. int r;
  257. struct dm_dev *old_dev, *new_dev;
  258. old_dev = dd->dm_dev;
  259. r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
  260. dd->dm_dev->mode | new_mode, &new_dev);
  261. if (r)
  262. return r;
  263. dd->dm_dev = new_dev;
  264. dm_put_table_device(md, old_dev);
  265. return 0;
  266. }
  267. /*
  268. * Note: the __ref annotation is because this function can call the __init
  269. * marked early_lookup_bdev when called during early boot code from dm-init.c.
  270. */
  271. int __ref dm_devt_from_path(const char *path, dev_t *dev_p)
  272. {
  273. int r;
  274. dev_t dev;
  275. unsigned int major, minor;
  276. char dummy;
  277. if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
  278. /* Extract the major/minor numbers */
  279. dev = MKDEV(major, minor);
  280. if (MAJOR(dev) != major || MINOR(dev) != minor)
  281. return -EOVERFLOW;
  282. } else {
  283. r = lookup_bdev(path, &dev);
  284. #ifndef MODULE
  285. if (r && system_state < SYSTEM_RUNNING)
  286. r = early_lookup_bdev(path, &dev);
  287. #endif
  288. if (r)
  289. return r;
  290. }
  291. *dev_p = dev;
  292. return 0;
  293. }
  294. EXPORT_SYMBOL(dm_devt_from_path);
  295. /*
  296. * Add a device to the list, or just increment the usage count if
  297. * it's already present.
  298. */
  299. int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
  300. struct dm_dev **result)
  301. {
  302. int r;
  303. dev_t dev;
  304. struct dm_dev_internal *dd;
  305. struct dm_table *t = ti->table;
  306. BUG_ON(!t);
  307. r = dm_devt_from_path(path, &dev);
  308. if (r)
  309. return r;
  310. if (dev == disk_devt(t->md->disk))
  311. return -EINVAL;
  312. dd = find_device(&t->devices, dev);
  313. if (!dd) {
  314. dd = kmalloc_obj(*dd);
  315. if (!dd)
  316. return -ENOMEM;
  317. r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev);
  318. if (r) {
  319. kfree(dd);
  320. return r;
  321. }
  322. refcount_set(&dd->count, 1);
  323. list_add(&dd->list, &t->devices);
  324. goto out;
  325. } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
  326. r = upgrade_mode(dd, mode, t->md);
  327. if (r)
  328. return r;
  329. }
  330. refcount_inc(&dd->count);
  331. out:
  332. *result = dd->dm_dev;
  333. return 0;
  334. }
  335. EXPORT_SYMBOL(dm_get_device);
  336. static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
  337. sector_t start, sector_t len, void *data)
  338. {
  339. struct queue_limits *limits = data;
  340. struct block_device *bdev = dev->bdev;
  341. struct request_queue *q = bdev_get_queue(bdev);
  342. if (unlikely(!q)) {
  343. DMWARN("%s: Cannot set limits for nonexistent device %pg",
  344. dm_device_name(ti->table->md), bdev);
  345. return 0;
  346. }
  347. mutex_lock(&q->limits_lock);
  348. /*
  349. * BLK_FEAT_ATOMIC_WRITES is not inherited from the bottom device in
  350. * blk_stack_limits(), so do it manually.
  351. */
  352. limits->features |= (q->limits.features & BLK_FEAT_ATOMIC_WRITES);
  353. if (blk_stack_limits(limits, &q->limits,
  354. get_start_sect(bdev) + start) < 0)
  355. DMWARN("%s: adding target device %pg caused an alignment inconsistency: "
  356. "physical_block_size=%u, logical_block_size=%u, "
  357. "alignment_offset=%u, start=%llu",
  358. dm_device_name(ti->table->md), bdev,
  359. q->limits.physical_block_size,
  360. q->limits.logical_block_size,
  361. q->limits.alignment_offset,
  362. (unsigned long long) start << SECTOR_SHIFT);
  363. /*
  364. * Only stack the integrity profile if the target doesn't have native
  365. * integrity support.
  366. */
  367. if (!dm_target_has_integrity(ti->type))
  368. queue_limits_stack_integrity_bdev(limits, bdev);
  369. mutex_unlock(&q->limits_lock);
  370. return 0;
  371. }
  372. /*
  373. * Decrement a device's use count and remove it if necessary.
  374. */
  375. void dm_put_device(struct dm_target *ti, struct dm_dev *d)
  376. {
  377. int found = 0;
  378. struct list_head *devices = &ti->table->devices;
  379. struct dm_dev_internal *dd;
  380. list_for_each_entry(dd, devices, list) {
  381. if (dd->dm_dev == d) {
  382. found = 1;
  383. break;
  384. }
  385. }
  386. if (!found) {
  387. DMERR("%s: device %s not in table devices list",
  388. dm_device_name(ti->table->md), d->name);
  389. return;
  390. }
  391. if (refcount_dec_and_test(&dd->count)) {
  392. dm_put_table_device(ti->table->md, d);
  393. list_del(&dd->list);
  394. kfree(dd);
  395. }
  396. }
  397. EXPORT_SYMBOL(dm_put_device);
  398. /*
  399. * Checks to see if the target joins onto the end of the table.
  400. */
  401. static int adjoin(struct dm_table *t, struct dm_target *ti)
  402. {
  403. struct dm_target *prev;
  404. if (!t->num_targets)
  405. return !ti->begin;
  406. prev = &t->targets[t->num_targets - 1];
  407. return (ti->begin == (prev->begin + prev->len));
  408. }
  409. /*
  410. * Used to dynamically allocate the arg array.
  411. *
  412. * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must
  413. * process messages even if some device is suspended. These messages have a
  414. * small fixed number of arguments.
  415. *
  416. * On the other hand, dm-switch needs to process bulk data using messages and
  417. * excessive use of GFP_NOIO could cause trouble.
  418. */
  419. static char **realloc_argv(unsigned int *size, char **old_argv)
  420. {
  421. char **argv;
  422. unsigned int new_size;
  423. gfp_t gfp;
  424. if (*size) {
  425. new_size = *size * 2;
  426. gfp = GFP_KERNEL;
  427. } else {
  428. new_size = 8;
  429. gfp = GFP_NOIO;
  430. }
  431. argv = kmalloc_array(new_size, sizeof(*argv), gfp);
  432. if (argv) {
  433. if (old_argv)
  434. memcpy(argv, old_argv, *size * sizeof(*argv));
  435. *size = new_size;
  436. }
  437. kfree(old_argv);
  438. return argv;
  439. }
  440. /*
  441. * Destructively splits up the argument list to pass to ctr.
  442. */
  443. int dm_split_args(int *argc, char ***argvp, char *input)
  444. {
  445. char *start, *end = input, *out, **argv = NULL;
  446. unsigned int array_size = 0;
  447. *argc = 0;
  448. if (!input) {
  449. *argvp = NULL;
  450. return 0;
  451. }
  452. argv = realloc_argv(&array_size, argv);
  453. if (!argv)
  454. return -ENOMEM;
  455. while (1) {
  456. /* Skip whitespace */
  457. start = skip_spaces(end);
  458. if (!*start)
  459. break; /* success, we hit the end */
  460. /* 'out' is used to remove any back-quotes */
  461. end = out = start;
  462. while (*end) {
  463. /* Everything apart from '\0' can be quoted */
  464. if (*end == '\\' && *(end + 1)) {
  465. *out++ = *(end + 1);
  466. end += 2;
  467. continue;
  468. }
  469. if (isspace(*end))
  470. break; /* end of token */
  471. *out++ = *end++;
  472. }
  473. /* have we already filled the array ? */
  474. if ((*argc + 1) > array_size) {
  475. argv = realloc_argv(&array_size, argv);
  476. if (!argv)
  477. return -ENOMEM;
  478. }
  479. /* we know this is whitespace */
  480. if (*end)
  481. end++;
  482. /* terminate the string and put it in the array */
  483. *out = '\0';
  484. argv[*argc] = start;
  485. (*argc)++;
  486. }
  487. *argvp = argv;
  488. return 0;
  489. }
  490. static void dm_set_stacking_limits(struct queue_limits *limits)
  491. {
  492. blk_set_stacking_limits(limits);
  493. limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
  494. }
  495. /*
  496. * Impose necessary and sufficient conditions on a devices's table such
  497. * that any incoming bio which respects its logical_block_size can be
  498. * processed successfully. If it falls across the boundary between
  499. * two or more targets, the size of each piece it gets split into must
  500. * be compatible with the logical_block_size of the target processing it.
  501. */
  502. static int validate_hardware_logical_block_alignment(struct dm_table *t,
  503. struct queue_limits *limits)
  504. {
  505. /*
  506. * This function uses arithmetic modulo the logical_block_size
  507. * (in units of 512-byte sectors).
  508. */
  509. unsigned short device_logical_block_size_sects =
  510. limits->logical_block_size >> SECTOR_SHIFT;
  511. /*
  512. * Offset of the start of the next table entry, mod logical_block_size.
  513. */
  514. unsigned short next_target_start = 0;
  515. /*
  516. * Given an aligned bio that extends beyond the end of a
  517. * target, how many sectors must the next target handle?
  518. */
  519. unsigned short remaining = 0;
  520. struct dm_target *ti;
  521. struct queue_limits ti_limits;
  522. unsigned int i;
  523. /*
  524. * Check each entry in the table in turn.
  525. */
  526. for (i = 0; i < t->num_targets; i++) {
  527. ti = dm_table_get_target(t, i);
  528. dm_set_stacking_limits(&ti_limits);
  529. /* combine all target devices' limits */
  530. if (ti->type->iterate_devices)
  531. ti->type->iterate_devices(ti, dm_set_device_limits,
  532. &ti_limits);
  533. /*
  534. * If the remaining sectors fall entirely within this
  535. * table entry are they compatible with its logical_block_size?
  536. */
  537. if (remaining < ti->len &&
  538. remaining & ((ti_limits.logical_block_size >>
  539. SECTOR_SHIFT) - 1))
  540. break; /* Error */
  541. next_target_start =
  542. (unsigned short) ((next_target_start + ti->len) &
  543. (device_logical_block_size_sects - 1));
  544. remaining = next_target_start ?
  545. device_logical_block_size_sects - next_target_start : 0;
  546. }
  547. if (remaining) {
  548. DMERR("%s: table line %u (start sect %llu len %llu) "
  549. "not aligned to h/w logical block size %u",
  550. dm_device_name(t->md), i,
  551. (unsigned long long) ti->begin,
  552. (unsigned long long) ti->len,
  553. limits->logical_block_size);
  554. return -EINVAL;
  555. }
  556. return 0;
  557. }
  558. int dm_table_add_target(struct dm_table *t, const char *type,
  559. sector_t start, sector_t len, char *params)
  560. {
  561. int r = -EINVAL, argc;
  562. char **argv;
  563. struct dm_target *ti;
  564. if (t->singleton) {
  565. DMERR("%s: target type %s must appear alone in table",
  566. dm_device_name(t->md), t->targets->type->name);
  567. return -EINVAL;
  568. }
  569. BUG_ON(t->num_targets >= t->num_allocated);
  570. ti = t->targets + t->num_targets;
  571. memset(ti, 0, sizeof(*ti));
  572. if (!len) {
  573. DMERR("%s: zero-length target", dm_device_name(t->md));
  574. return -EINVAL;
  575. }
  576. if (start + len < start || start + len > LLONG_MAX >> SECTOR_SHIFT) {
  577. DMERR("%s: too large device", dm_device_name(t->md));
  578. return -EINVAL;
  579. }
  580. ti->type = dm_get_target_type(type);
  581. if (!ti->type) {
  582. DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
  583. return -EINVAL;
  584. }
  585. if (dm_target_needs_singleton(ti->type)) {
  586. if (t->num_targets) {
  587. ti->error = "singleton target type must appear alone in table";
  588. goto bad;
  589. }
  590. t->singleton = true;
  591. }
  592. if (dm_target_always_writeable(ti->type) &&
  593. !(t->mode & BLK_OPEN_WRITE)) {
  594. ti->error = "target type may not be included in a read-only table";
  595. goto bad;
  596. }
  597. if (t->immutable_target_type) {
  598. if (t->immutable_target_type != ti->type) {
  599. ti->error = "immutable target type cannot be mixed with other target types";
  600. goto bad;
  601. }
  602. } else if (dm_target_is_immutable(ti->type)) {
  603. if (t->num_targets) {
  604. ti->error = "immutable target type cannot be mixed with other target types";
  605. goto bad;
  606. }
  607. t->immutable_target_type = ti->type;
  608. }
  609. ti->table = t;
  610. ti->begin = start;
  611. ti->len = len;
  612. ti->error = "Unknown error";
  613. /*
  614. * Does this target adjoin the previous one ?
  615. */
  616. if (!adjoin(t, ti)) {
  617. ti->error = "Gap in table";
  618. goto bad;
  619. }
  620. r = dm_split_args(&argc, &argv, params);
  621. if (r) {
  622. ti->error = "couldn't split parameters";
  623. goto bad;
  624. }
  625. r = ti->type->ctr(ti, argc, argv);
  626. kfree(argv);
  627. if (r)
  628. goto bad;
  629. t->highs[t->num_targets++] = ti->begin + ti->len - 1;
  630. if (!ti->num_discard_bios && ti->discards_supported)
  631. DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
  632. dm_device_name(t->md), type);
  633. if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
  634. static_branch_enable(&swap_bios_enabled);
  635. if (!ti->flush_bypasses_map)
  636. t->flush_bypasses_map = false;
  637. return 0;
  638. bad:
  639. DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
  640. dm_put_target_type(ti->type);
  641. return r;
  642. }
  643. /*
  644. * Target argument parsing helpers.
  645. */
  646. static int validate_next_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
  647. unsigned int *value, char **error, unsigned int grouped)
  648. {
  649. const char *arg_str = dm_shift_arg(arg_set);
  650. char dummy;
  651. if (!arg_str ||
  652. (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
  653. (*value < arg->min) ||
  654. (*value > arg->max) ||
  655. (grouped && arg_set->argc < *value)) {
  656. *error = arg->error;
  657. return -EINVAL;
  658. }
  659. return 0;
  660. }
  661. int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
  662. unsigned int *value, char **error)
  663. {
  664. return validate_next_arg(arg, arg_set, value, error, 0);
  665. }
  666. EXPORT_SYMBOL(dm_read_arg);
  667. int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set,
  668. unsigned int *value, char **error)
  669. {
  670. return validate_next_arg(arg, arg_set, value, error, 1);
  671. }
  672. EXPORT_SYMBOL(dm_read_arg_group);
  673. const char *dm_shift_arg(struct dm_arg_set *as)
  674. {
  675. char *r;
  676. if (as->argc) {
  677. as->argc--;
  678. r = *as->argv;
  679. as->argv++;
  680. return r;
  681. }
  682. return NULL;
  683. }
  684. EXPORT_SYMBOL(dm_shift_arg);
  685. void dm_consume_args(struct dm_arg_set *as, unsigned int num_args)
  686. {
  687. BUG_ON(as->argc < num_args);
  688. as->argc -= num_args;
  689. as->argv += num_args;
  690. }
  691. EXPORT_SYMBOL(dm_consume_args);
  692. static bool __table_type_bio_based(enum dm_queue_mode table_type)
  693. {
  694. return (table_type == DM_TYPE_BIO_BASED ||
  695. table_type == DM_TYPE_DAX_BIO_BASED);
  696. }
  697. static bool __table_type_request_based(enum dm_queue_mode table_type)
  698. {
  699. return table_type == DM_TYPE_REQUEST_BASED;
  700. }
  701. void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
  702. {
  703. t->type = type;
  704. }
  705. EXPORT_SYMBOL_GPL(dm_table_set_type);
  706. /* validate the dax capability of the target device span */
  707. static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
  708. sector_t start, sector_t len, void *data)
  709. {
  710. if (dev->dax_dev)
  711. return false;
  712. DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
  713. return true;
  714. }
  715. /* Check devices support synchronous DAX */
  716. static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
  717. sector_t start, sector_t len, void *data)
  718. {
  719. return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
  720. }
  721. static bool dm_table_supports_dax(struct dm_table *t,
  722. iterate_devices_callout_fn iterate_fn)
  723. {
  724. /* Ensure that all targets support DAX. */
  725. for (unsigned int i = 0; i < t->num_targets; i++) {
  726. struct dm_target *ti = dm_table_get_target(t, i);
  727. if (!ti->type->direct_access)
  728. return false;
  729. if (dm_target_is_wildcard(ti->type) ||
  730. !ti->type->iterate_devices ||
  731. ti->type->iterate_devices(ti, iterate_fn, NULL))
  732. return false;
  733. }
  734. return true;
  735. }
  736. static int device_is_not_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
  737. sector_t start, sector_t len, void *data)
  738. {
  739. struct block_device *bdev = dev->bdev;
  740. struct request_queue *q = bdev_get_queue(bdev);
  741. /* request-based cannot stack on partitions! */
  742. if (bdev_is_partition(bdev))
  743. return true;
  744. return !queue_is_mq(q);
  745. }
  746. static int dm_table_determine_type(struct dm_table *t)
  747. {
  748. unsigned int bio_based = 0, request_based = 0, hybrid = 0;
  749. struct dm_target *ti;
  750. struct list_head *devices = dm_table_get_devices(t);
  751. enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
  752. if (t->type != DM_TYPE_NONE) {
  753. /* target already set the table's type */
  754. if (t->type == DM_TYPE_BIO_BASED) {
  755. /* possibly upgrade to a variant of bio-based */
  756. goto verify_bio_based;
  757. }
  758. BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
  759. goto verify_rq_based;
  760. }
  761. for (unsigned int i = 0; i < t->num_targets; i++) {
  762. ti = dm_table_get_target(t, i);
  763. if (dm_target_hybrid(ti))
  764. hybrid = 1;
  765. else if (dm_target_request_based(ti))
  766. request_based = 1;
  767. else
  768. bio_based = 1;
  769. if (bio_based && request_based) {
  770. DMERR("Inconsistent table: different target types can't be mixed up");
  771. return -EINVAL;
  772. }
  773. }
  774. if (hybrid && !bio_based && !request_based) {
  775. /*
  776. * The targets can work either way.
  777. * Determine the type from the live device.
  778. * Default to bio-based if device is new.
  779. */
  780. if (__table_type_request_based(live_md_type))
  781. request_based = 1;
  782. else
  783. bio_based = 1;
  784. }
  785. if (bio_based) {
  786. verify_bio_based:
  787. /* We must use this table as bio-based */
  788. t->type = DM_TYPE_BIO_BASED;
  789. if (dm_table_supports_dax(t, device_not_dax_capable) ||
  790. (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
  791. t->type = DM_TYPE_DAX_BIO_BASED;
  792. }
  793. return 0;
  794. }
  795. BUG_ON(!request_based); /* No targets in this table */
  796. t->type = DM_TYPE_REQUEST_BASED;
  797. verify_rq_based:
  798. /*
  799. * Request-based dm supports only tables that have a single target now.
  800. * To support multiple targets, request splitting support is needed,
  801. * and that needs lots of changes in the block-layer.
  802. * (e.g. request completion process for partial completion.)
  803. */
  804. if (t->num_targets > 1) {
  805. DMERR("request-based DM doesn't support multiple targets");
  806. return -EINVAL;
  807. }
  808. if (list_empty(devices)) {
  809. int srcu_idx;
  810. struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
  811. /* inherit live table's type */
  812. if (live_table)
  813. t->type = live_table->type;
  814. dm_put_live_table(t->md, srcu_idx);
  815. return 0;
  816. }
  817. ti = dm_table_get_immutable_target(t);
  818. if (!ti) {
  819. DMERR("table load rejected: immutable target is required");
  820. return -EINVAL;
  821. } else if (ti->max_io_len) {
  822. DMERR("table load rejected: immutable target that splits IO is not supported");
  823. return -EINVAL;
  824. }
  825. /* Non-request-stackable devices can't be used for request-based dm */
  826. if (!ti->type->iterate_devices ||
  827. ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) {
  828. DMERR("table load rejected: including non-request-stackable devices");
  829. return -EINVAL;
  830. }
  831. return 0;
  832. }
  833. enum dm_queue_mode dm_table_get_type(struct dm_table *t)
  834. {
  835. return t->type;
  836. }
  837. struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
  838. {
  839. return t->immutable_target_type;
  840. }
  841. struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
  842. {
  843. /* Immutable target is implicitly a singleton */
  844. if (t->num_targets > 1 ||
  845. !dm_target_is_immutable(t->targets[0].type))
  846. return NULL;
  847. return t->targets;
  848. }
  849. struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
  850. {
  851. for (unsigned int i = 0; i < t->num_targets; i++) {
  852. struct dm_target *ti = dm_table_get_target(t, i);
  853. if (dm_target_is_wildcard(ti->type))
  854. return ti;
  855. }
  856. return NULL;
  857. }
  858. bool dm_table_request_based(struct dm_table *t)
  859. {
  860. return __table_type_request_based(dm_table_get_type(t));
  861. }
  862. static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
  863. {
  864. enum dm_queue_mode type = dm_table_get_type(t);
  865. unsigned int per_io_data_size = 0, front_pad, io_front_pad;
  866. unsigned int min_pool_size = 0, pool_size;
  867. struct dm_md_mempools *pools;
  868. unsigned int bioset_flags = 0;
  869. if (unlikely(type == DM_TYPE_NONE)) {
  870. DMERR("no table type is set, can't allocate mempools");
  871. return -EINVAL;
  872. }
  873. pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
  874. if (!pools)
  875. return -ENOMEM;
  876. if (type == DM_TYPE_REQUEST_BASED) {
  877. pool_size = dm_get_reserved_rq_based_ios();
  878. front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
  879. goto init_bs;
  880. }
  881. if (md->queue->limits.features & BLK_FEAT_POLL)
  882. bioset_flags |= BIOSET_PERCPU_CACHE;
  883. for (unsigned int i = 0; i < t->num_targets; i++) {
  884. struct dm_target *ti = dm_table_get_target(t, i);
  885. per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
  886. min_pool_size = max(min_pool_size, ti->num_flush_bios);
  887. }
  888. pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
  889. front_pad = roundup(per_io_data_size,
  890. __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
  891. io_front_pad = roundup(per_io_data_size,
  892. __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
  893. if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
  894. goto out_free_pools;
  895. init_bs:
  896. if (bioset_init(&pools->bs, pool_size, front_pad, 0))
  897. goto out_free_pools;
  898. t->mempools = pools;
  899. return 0;
  900. out_free_pools:
  901. dm_free_md_mempools(pools);
  902. return -ENOMEM;
  903. }
  904. static int setup_indexes(struct dm_table *t)
  905. {
  906. int i;
  907. unsigned int total = 0;
  908. sector_t *indexes;
  909. /* allocate the space for *all* the indexes */
  910. for (i = t->depth - 2; i >= 0; i--) {
  911. t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
  912. total += t->counts[i];
  913. }
  914. indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL);
  915. if (!indexes)
  916. return -ENOMEM;
  917. /* set up internal nodes, bottom-up */
  918. for (i = t->depth - 2; i >= 0; i--) {
  919. t->index[i] = indexes;
  920. indexes += (KEYS_PER_NODE * t->counts[i]);
  921. setup_btree_index(i, t);
  922. }
  923. return 0;
  924. }
  925. /*
  926. * Builds the btree to index the map.
  927. */
  928. static int dm_table_build_index(struct dm_table *t)
  929. {
  930. int r = 0;
  931. unsigned int leaf_nodes;
  932. /* how many indexes will the btree have ? */
  933. leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
  934. t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
  935. /* leaf layer has already been set up */
  936. t->counts[t->depth - 1] = leaf_nodes;
  937. t->index[t->depth - 1] = t->highs;
  938. if (t->depth >= 2)
  939. r = setup_indexes(t);
  940. return r;
  941. }
  942. #ifdef CONFIG_BLK_INLINE_ENCRYPTION
  943. struct dm_crypto_profile {
  944. struct blk_crypto_profile profile;
  945. struct mapped_device *md;
  946. };
  947. static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
  948. sector_t start, sector_t len, void *data)
  949. {
  950. const struct blk_crypto_key *key = data;
  951. blk_crypto_evict_key(dev->bdev, key);
  952. return 0;
  953. }
  954. /*
  955. * When an inline encryption key is evicted from a device-mapper device, evict
  956. * it from all the underlying devices.
  957. */
  958. static int dm_keyslot_evict(struct blk_crypto_profile *profile,
  959. const struct blk_crypto_key *key, unsigned int slot)
  960. {
  961. struct mapped_device *md =
  962. container_of(profile, struct dm_crypto_profile, profile)->md;
  963. struct dm_table *t;
  964. int srcu_idx;
  965. t = dm_get_live_table(md, &srcu_idx);
  966. if (!t)
  967. goto put_live_table;
  968. for (unsigned int i = 0; i < t->num_targets; i++) {
  969. struct dm_target *ti = dm_table_get_target(t, i);
  970. if (!ti->type->iterate_devices)
  971. continue;
  972. ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
  973. (void *)key);
  974. }
  975. put_live_table:
  976. dm_put_live_table(md, srcu_idx);
  977. return 0;
  978. }
  979. enum dm_wrappedkey_op {
  980. DERIVE_SW_SECRET,
  981. IMPORT_KEY,
  982. GENERATE_KEY,
  983. PREPARE_KEY,
  984. };
  985. struct dm_wrappedkey_op_args {
  986. enum dm_wrappedkey_op op;
  987. int err;
  988. union {
  989. struct {
  990. const u8 *eph_key;
  991. size_t eph_key_size;
  992. u8 *sw_secret;
  993. } derive_sw_secret;
  994. struct {
  995. const u8 *raw_key;
  996. size_t raw_key_size;
  997. u8 *lt_key;
  998. } import_key;
  999. struct {
  1000. u8 *lt_key;
  1001. } generate_key;
  1002. struct {
  1003. const u8 *lt_key;
  1004. size_t lt_key_size;
  1005. u8 *eph_key;
  1006. } prepare_key;
  1007. };
  1008. };
  1009. static int dm_wrappedkey_op_callback(struct dm_target *ti, struct dm_dev *dev,
  1010. sector_t start, sector_t len, void *data)
  1011. {
  1012. struct dm_wrappedkey_op_args *args = data;
  1013. struct block_device *bdev = dev->bdev;
  1014. struct blk_crypto_profile *profile =
  1015. bdev_get_queue(bdev)->crypto_profile;
  1016. int err = -EOPNOTSUPP;
  1017. switch (args->op) {
  1018. case DERIVE_SW_SECRET:
  1019. err = blk_crypto_derive_sw_secret(
  1020. bdev,
  1021. args->derive_sw_secret.eph_key,
  1022. args->derive_sw_secret.eph_key_size,
  1023. args->derive_sw_secret.sw_secret);
  1024. break;
  1025. case IMPORT_KEY:
  1026. err = blk_crypto_import_key(profile,
  1027. args->import_key.raw_key,
  1028. args->import_key.raw_key_size,
  1029. args->import_key.lt_key);
  1030. break;
  1031. case GENERATE_KEY:
  1032. err = blk_crypto_generate_key(profile,
  1033. args->generate_key.lt_key);
  1034. break;
  1035. case PREPARE_KEY:
  1036. err = blk_crypto_prepare_key(profile,
  1037. args->prepare_key.lt_key,
  1038. args->prepare_key.lt_key_size,
  1039. args->prepare_key.eph_key);
  1040. break;
  1041. }
  1042. args->err = err;
  1043. return 1; /* No need to continue the iteration. */
  1044. }
  1045. static int dm_exec_wrappedkey_op(struct blk_crypto_profile *profile,
  1046. struct dm_wrappedkey_op_args *args)
  1047. {
  1048. struct mapped_device *md =
  1049. container_of(profile, struct dm_crypto_profile, profile)->md;
  1050. struct dm_target *ti;
  1051. struct dm_table *t;
  1052. int srcu_idx;
  1053. int i;
  1054. args->err = -EOPNOTSUPP;
  1055. t = dm_get_live_table(md, &srcu_idx);
  1056. if (!t)
  1057. goto out;
  1058. /*
  1059. * blk-crypto currently has no support for multiple incompatible
  1060. * implementations of wrapped inline crypto keys on a single system.
  1061. * It was already checked earlier that support for wrapped keys was
  1062. * declared on all underlying devices. Thus, all the underlying devices
  1063. * should support all wrapped key operations and they should behave
  1064. * identically, i.e. work with the same keys. So, just executing the
  1065. * operation on the first device suffices for now.
  1066. */
  1067. for (i = 0; i < t->num_targets; i++) {
  1068. ti = dm_table_get_target(t, i);
  1069. if (!ti->type->iterate_devices)
  1070. continue;
  1071. if (ti->type->iterate_devices(ti, dm_wrappedkey_op_callback, args) != 0)
  1072. break;
  1073. }
  1074. out:
  1075. dm_put_live_table(md, srcu_idx);
  1076. return args->err;
  1077. }
  1078. static int dm_derive_sw_secret(struct blk_crypto_profile *profile,
  1079. const u8 *eph_key, size_t eph_key_size,
  1080. u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
  1081. {
  1082. struct dm_wrappedkey_op_args args = {
  1083. .op = DERIVE_SW_SECRET,
  1084. .derive_sw_secret = {
  1085. .eph_key = eph_key,
  1086. .eph_key_size = eph_key_size,
  1087. .sw_secret = sw_secret,
  1088. },
  1089. };
  1090. return dm_exec_wrappedkey_op(profile, &args);
  1091. }
  1092. static int dm_import_key(struct blk_crypto_profile *profile,
  1093. const u8 *raw_key, size_t raw_key_size,
  1094. u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
  1095. {
  1096. struct dm_wrappedkey_op_args args = {
  1097. .op = IMPORT_KEY,
  1098. .import_key = {
  1099. .raw_key = raw_key,
  1100. .raw_key_size = raw_key_size,
  1101. .lt_key = lt_key,
  1102. },
  1103. };
  1104. return dm_exec_wrappedkey_op(profile, &args);
  1105. }
  1106. static int dm_generate_key(struct blk_crypto_profile *profile,
  1107. u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
  1108. {
  1109. struct dm_wrappedkey_op_args args = {
  1110. .op = GENERATE_KEY,
  1111. .generate_key = {
  1112. .lt_key = lt_key,
  1113. },
  1114. };
  1115. return dm_exec_wrappedkey_op(profile, &args);
  1116. }
  1117. static int dm_prepare_key(struct blk_crypto_profile *profile,
  1118. const u8 *lt_key, size_t lt_key_size,
  1119. u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
  1120. {
  1121. struct dm_wrappedkey_op_args args = {
  1122. .op = PREPARE_KEY,
  1123. .prepare_key = {
  1124. .lt_key = lt_key,
  1125. .lt_key_size = lt_key_size,
  1126. .eph_key = eph_key,
  1127. },
  1128. };
  1129. return dm_exec_wrappedkey_op(profile, &args);
  1130. }
  1131. static int
  1132. device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
  1133. sector_t start, sector_t len, void *data)
  1134. {
  1135. struct blk_crypto_profile *parent = data;
  1136. struct blk_crypto_profile *child =
  1137. bdev_get_queue(dev->bdev)->crypto_profile;
  1138. blk_crypto_intersect_capabilities(parent, child);
  1139. return 0;
  1140. }
  1141. void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
  1142. {
  1143. struct dm_crypto_profile *dmcp = container_of(profile,
  1144. struct dm_crypto_profile,
  1145. profile);
  1146. if (!profile)
  1147. return;
  1148. blk_crypto_profile_destroy(profile);
  1149. kfree(dmcp);
  1150. }
  1151. static void dm_table_destroy_crypto_profile(struct dm_table *t)
  1152. {
  1153. dm_destroy_crypto_profile(t->crypto_profile);
  1154. t->crypto_profile = NULL;
  1155. }
  1156. /*
  1157. * Constructs and initializes t->crypto_profile with a crypto profile that
  1158. * represents the common set of crypto capabilities of the devices described by
  1159. * the dm_table. However, if the constructed crypto profile doesn't support all
  1160. * crypto capabilities that are supported by the current mapped_device, it
  1161. * returns an error instead, since we don't support removing crypto capabilities
  1162. * on table changes. Finally, if the constructed crypto profile is "empty" (has
  1163. * no crypto capabilities at all), it just sets t->crypto_profile to NULL.
  1164. */
  1165. static int dm_table_construct_crypto_profile(struct dm_table *t)
  1166. {
  1167. struct dm_crypto_profile *dmcp;
  1168. struct blk_crypto_profile *profile;
  1169. unsigned int i;
  1170. bool empty_profile = true;
  1171. dmcp = kmalloc_obj(*dmcp);
  1172. if (!dmcp)
  1173. return -ENOMEM;
  1174. dmcp->md = t->md;
  1175. profile = &dmcp->profile;
  1176. blk_crypto_profile_init(profile, 0);
  1177. profile->ll_ops.keyslot_evict = dm_keyslot_evict;
  1178. profile->max_dun_bytes_supported = UINT_MAX;
  1179. memset(profile->modes_supported, 0xFF,
  1180. sizeof(profile->modes_supported));
  1181. profile->key_types_supported = ~0;
  1182. for (i = 0; i < t->num_targets; i++) {
  1183. struct dm_target *ti = dm_table_get_target(t, i);
  1184. if (!dm_target_passes_crypto(ti->type)) {
  1185. blk_crypto_intersect_capabilities(profile, NULL);
  1186. break;
  1187. }
  1188. if (!ti->type->iterate_devices)
  1189. continue;
  1190. ti->type->iterate_devices(ti,
  1191. device_intersect_crypto_capabilities,
  1192. profile);
  1193. }
  1194. if (profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED) {
  1195. profile->ll_ops.derive_sw_secret = dm_derive_sw_secret;
  1196. profile->ll_ops.import_key = dm_import_key;
  1197. profile->ll_ops.generate_key = dm_generate_key;
  1198. profile->ll_ops.prepare_key = dm_prepare_key;
  1199. }
  1200. if (t->md->queue &&
  1201. !blk_crypto_has_capabilities(profile,
  1202. t->md->queue->crypto_profile)) {
  1203. DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
  1204. dm_destroy_crypto_profile(profile);
  1205. return -EINVAL;
  1206. }
  1207. /*
  1208. * If the new profile doesn't actually support any crypto capabilities,
  1209. * we may as well represent it with a NULL profile.
  1210. */
  1211. for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) {
  1212. if (profile->modes_supported[i]) {
  1213. empty_profile = false;
  1214. break;
  1215. }
  1216. }
  1217. if (empty_profile) {
  1218. dm_destroy_crypto_profile(profile);
  1219. profile = NULL;
  1220. }
  1221. /*
  1222. * t->crypto_profile is only set temporarily while the table is being
  1223. * set up, and it gets set to NULL after the profile has been
  1224. * transferred to the request_queue.
  1225. */
  1226. t->crypto_profile = profile;
  1227. return 0;
  1228. }
  1229. static void dm_update_crypto_profile(struct request_queue *q,
  1230. struct dm_table *t)
  1231. {
  1232. if (!t->crypto_profile)
  1233. return;
  1234. /* Make the crypto profile less restrictive. */
  1235. if (!q->crypto_profile) {
  1236. blk_crypto_register(t->crypto_profile, q);
  1237. } else {
  1238. blk_crypto_update_capabilities(q->crypto_profile,
  1239. t->crypto_profile);
  1240. dm_destroy_crypto_profile(t->crypto_profile);
  1241. }
  1242. t->crypto_profile = NULL;
  1243. }
  1244. #else /* CONFIG_BLK_INLINE_ENCRYPTION */
  1245. static int dm_table_construct_crypto_profile(struct dm_table *t)
  1246. {
  1247. return 0;
  1248. }
  1249. void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
  1250. {
  1251. }
  1252. static void dm_table_destroy_crypto_profile(struct dm_table *t)
  1253. {
  1254. }
  1255. static void dm_update_crypto_profile(struct request_queue *q,
  1256. struct dm_table *t)
  1257. {
  1258. }
  1259. #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
  1260. /*
  1261. * Prepares the table for use by building the indices,
  1262. * setting the type, and allocating mempools.
  1263. */
  1264. int dm_table_complete(struct dm_table *t)
  1265. {
  1266. int r;
  1267. r = dm_table_determine_type(t);
  1268. if (r) {
  1269. DMERR("unable to determine table type");
  1270. return r;
  1271. }
  1272. r = dm_table_build_index(t);
  1273. if (r) {
  1274. DMERR("unable to build btrees");
  1275. return r;
  1276. }
  1277. r = dm_table_construct_crypto_profile(t);
  1278. if (r) {
  1279. DMERR("could not construct crypto profile.");
  1280. return r;
  1281. }
  1282. r = dm_table_alloc_md_mempools(t, t->md);
  1283. if (r)
  1284. DMERR("unable to allocate mempools");
  1285. return r;
  1286. }
  1287. static DEFINE_MUTEX(_event_lock);
  1288. void dm_table_event_callback(struct dm_table *t,
  1289. void (*fn)(void *), void *context)
  1290. {
  1291. mutex_lock(&_event_lock);
  1292. t->event_fn = fn;
  1293. t->event_context = context;
  1294. mutex_unlock(&_event_lock);
  1295. }
  1296. void dm_table_event(struct dm_table *t)
  1297. {
  1298. mutex_lock(&_event_lock);
  1299. if (t->event_fn)
  1300. t->event_fn(t->event_context);
  1301. mutex_unlock(&_event_lock);
  1302. }
  1303. EXPORT_SYMBOL(dm_table_event);
  1304. inline sector_t dm_table_get_size(struct dm_table *t)
  1305. {
  1306. return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
  1307. }
  1308. EXPORT_SYMBOL(dm_table_get_size);
  1309. /*
  1310. * Search the btree for the correct target.
  1311. *
  1312. * Caller should check returned pointer for NULL
  1313. * to trap I/O beyond end of device.
  1314. */
  1315. struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
  1316. {
  1317. unsigned int l, n = 0, k = 0;
  1318. sector_t *node;
  1319. if (unlikely(sector >= dm_table_get_size(t)))
  1320. return NULL;
  1321. for (l = 0; l < t->depth; l++) {
  1322. n = get_child(n, k);
  1323. node = get_node(t, l, n);
  1324. for (k = 0; k < KEYS_PER_NODE; k++)
  1325. if (node[k] >= sector)
  1326. break;
  1327. }
  1328. return &t->targets[(KEYS_PER_NODE * n) + k];
  1329. }
  1330. /*
  1331. * type->iterate_devices() should be called when the sanity check needs to
  1332. * iterate and check all underlying data devices. iterate_devices() will
  1333. * iterate all underlying data devices until it encounters a non-zero return
  1334. * code, returned by whether the input iterate_devices_callout_fn, or
  1335. * iterate_devices() itself internally.
  1336. *
  1337. * For some target type (e.g. dm-stripe), one call of iterate_devices() may
  1338. * iterate multiple underlying devices internally, in which case a non-zero
  1339. * return code returned by iterate_devices_callout_fn will stop the iteration
  1340. * in advance.
  1341. *
  1342. * Cases requiring _any_ underlying device supporting some kind of attribute,
  1343. * should use the iteration structure like dm_table_any_dev_attr(), or call
  1344. * it directly. @func should handle semantics of positive examples, e.g.
  1345. * capable of something.
  1346. *
  1347. * Cases requiring _all_ underlying devices supporting some kind of attribute,
  1348. * should use the iteration structure like dm_table_supports_nowait() or
  1349. * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that
  1350. * uses an @anti_func that handle semantics of counter examples, e.g. not
  1351. * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
  1352. */
  1353. static bool dm_table_any_dev_attr(struct dm_table *t,
  1354. iterate_devices_callout_fn func, void *data)
  1355. {
  1356. for (unsigned int i = 0; i < t->num_targets; i++) {
  1357. struct dm_target *ti = dm_table_get_target(t, i);
  1358. if (ti->type->iterate_devices &&
  1359. ti->type->iterate_devices(ti, func, data))
  1360. return true;
  1361. }
  1362. return false;
  1363. }
  1364. static int count_device(struct dm_target *ti, struct dm_dev *dev,
  1365. sector_t start, sector_t len, void *data)
  1366. {
  1367. unsigned int *num_devices = data;
  1368. (*num_devices)++;
  1369. return 0;
  1370. }
  1371. /*
  1372. * Check whether a table has no data devices attached using each
  1373. * target's iterate_devices method.
  1374. * Returns false if the result is unknown because a target doesn't
  1375. * support iterate_devices.
  1376. */
  1377. bool dm_table_has_no_data_devices(struct dm_table *t)
  1378. {
  1379. for (unsigned int i = 0; i < t->num_targets; i++) {
  1380. struct dm_target *ti = dm_table_get_target(t, i);
  1381. unsigned int num_devices = 0;
  1382. if (!ti->type->iterate_devices)
  1383. return false;
  1384. ti->type->iterate_devices(ti, count_device, &num_devices);
  1385. if (num_devices)
  1386. return false;
  1387. }
  1388. return true;
  1389. }
  1390. bool dm_table_is_wildcard(struct dm_table *t)
  1391. {
  1392. for (unsigned int i = 0; i < t->num_targets; i++) {
  1393. struct dm_target *ti = dm_table_get_target(t, i);
  1394. if (!dm_target_is_wildcard(ti->type))
  1395. return false;
  1396. }
  1397. return true;
  1398. }
  1399. static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev,
  1400. sector_t start, sector_t len, void *data)
  1401. {
  1402. bool *zoned = data;
  1403. return bdev_is_zoned(dev->bdev) != *zoned;
  1404. }
  1405. static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev,
  1406. sector_t start, sector_t len, void *data)
  1407. {
  1408. return bdev_is_zoned(dev->bdev);
  1409. }
  1410. /*
  1411. * Check the device zoned model based on the target feature flag. If the target
  1412. * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
  1413. * also accepted but all devices must have the same zoned model. If the target
  1414. * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
  1415. * zoned model with all zoned devices having the same zone size.
  1416. */
  1417. static bool dm_table_supports_zoned(struct dm_table *t, bool zoned)
  1418. {
  1419. for (unsigned int i = 0; i < t->num_targets; i++) {
  1420. struct dm_target *ti = dm_table_get_target(t, i);
  1421. /*
  1422. * For the wildcard target (dm-error), if we do not have a
  1423. * backing device, we must always return false. If we have a
  1424. * backing device, the result must depend on checking zoned
  1425. * model, like for any other target. So for this, check directly
  1426. * if the target backing device is zoned as we get "false" when
  1427. * dm-error was set without a backing device.
  1428. */
  1429. if (dm_target_is_wildcard(ti->type) &&
  1430. !ti->type->iterate_devices(ti, device_is_zoned_model, NULL))
  1431. return false;
  1432. if (dm_target_supports_zoned_hm(ti->type)) {
  1433. if (!ti->type->iterate_devices ||
  1434. ti->type->iterate_devices(ti, device_not_zoned,
  1435. &zoned))
  1436. return false;
  1437. } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
  1438. if (zoned)
  1439. return false;
  1440. }
  1441. }
  1442. return true;
  1443. }
  1444. static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
  1445. sector_t start, sector_t len, void *data)
  1446. {
  1447. unsigned int *zone_sectors = data;
  1448. if (!bdev_is_zoned(dev->bdev))
  1449. return 0;
  1450. return bdev_zone_sectors(dev->bdev) != *zone_sectors;
  1451. }
  1452. /*
  1453. * Check consistency of zoned model and zone sectors across all targets. For
  1454. * zone sectors, if the destination device is a zoned block device, it shall
  1455. * have the specified zone_sectors.
  1456. */
  1457. static int validate_hardware_zoned(struct dm_table *t, bool zoned,
  1458. unsigned int zone_sectors)
  1459. {
  1460. if (!zoned)
  1461. return 0;
  1462. if (!dm_table_supports_zoned(t, zoned)) {
  1463. DMERR("%s: zoned model is not consistent across all devices",
  1464. dm_device_name(t->md));
  1465. return -EINVAL;
  1466. }
  1467. /* Check zone size validity and compatibility */
  1468. if (!zone_sectors || !is_power_of_2(zone_sectors))
  1469. return -EINVAL;
  1470. if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
  1471. DMERR("%s: zone sectors is not consistent across all zoned devices",
  1472. dm_device_name(t->md));
  1473. return -EINVAL;
  1474. }
  1475. return 0;
  1476. }
  1477. /*
  1478. * Establish the new table's queue_limits and validate them.
  1479. */
  1480. int dm_calculate_queue_limits(struct dm_table *t,
  1481. struct queue_limits *limits)
  1482. {
  1483. struct queue_limits ti_limits;
  1484. unsigned int zone_sectors = 0;
  1485. bool zoned = false;
  1486. dm_set_stacking_limits(limits);
  1487. t->integrity_supported = true;
  1488. for (unsigned int i = 0; i < t->num_targets; i++) {
  1489. struct dm_target *ti = dm_table_get_target(t, i);
  1490. if (!dm_target_passes_integrity(ti->type))
  1491. t->integrity_supported = false;
  1492. }
  1493. for (unsigned int i = 0; i < t->num_targets; i++) {
  1494. struct dm_target *ti = dm_table_get_target(t, i);
  1495. dm_set_stacking_limits(&ti_limits);
  1496. if (!ti->type->iterate_devices) {
  1497. /* Set I/O hints portion of queue limits */
  1498. if (ti->type->io_hints)
  1499. ti->type->io_hints(ti, &ti_limits);
  1500. goto combine_limits;
  1501. }
  1502. /*
  1503. * Combine queue limits of all the devices this target uses.
  1504. */
  1505. ti->type->iterate_devices(ti, dm_set_device_limits,
  1506. &ti_limits);
  1507. if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
  1508. /*
  1509. * After stacking all limits, validate all devices
  1510. * in table support this zoned model and zone sectors.
  1511. */
  1512. zoned = (ti_limits.features & BLK_FEAT_ZONED);
  1513. zone_sectors = ti_limits.chunk_sectors;
  1514. }
  1515. /* Set I/O hints portion of queue limits */
  1516. if (ti->type->io_hints)
  1517. ti->type->io_hints(ti, &ti_limits);
  1518. /*
  1519. * Check each device area is consistent with the target's
  1520. * overall queue limits.
  1521. */
  1522. if (ti->type->iterate_devices(ti, device_area_is_invalid,
  1523. &ti_limits))
  1524. return -EINVAL;
  1525. combine_limits:
  1526. /*
  1527. * Merge this target's queue limits into the overall limits
  1528. * for the table.
  1529. */
  1530. if (blk_stack_limits(limits, &ti_limits, 0) < 0)
  1531. DMWARN("%s: adding target device (start sect %llu len %llu) "
  1532. "caused an alignment inconsistency",
  1533. dm_device_name(t->md),
  1534. (unsigned long long) ti->begin,
  1535. (unsigned long long) ti->len);
  1536. if (t->integrity_supported ||
  1537. dm_target_has_integrity(ti->type)) {
  1538. if (!queue_limits_stack_integrity(limits, &ti_limits)) {
  1539. DMWARN("%s: adding target device (start sect %llu len %llu) "
  1540. "disabled integrity support due to incompatibility",
  1541. dm_device_name(t->md),
  1542. (unsigned long long) ti->begin,
  1543. (unsigned long long) ti->len);
  1544. t->integrity_supported = false;
  1545. }
  1546. }
  1547. }
  1548. /*
  1549. * Verify that the zoned model and zone sectors, as determined before
  1550. * any .io_hints override, are the same across all devices in the table.
  1551. * - this is especially relevant if .io_hints is emulating a disk-managed
  1552. * zoned model on host-managed zoned block devices.
  1553. * BUT...
  1554. */
  1555. if (limits->features & BLK_FEAT_ZONED) {
  1556. /*
  1557. * ...IF the above limits stacking determined a zoned model
  1558. * validate that all of the table's devices conform to it.
  1559. */
  1560. zoned = limits->features & BLK_FEAT_ZONED;
  1561. zone_sectors = limits->chunk_sectors;
  1562. }
  1563. if (validate_hardware_zoned(t, zoned, zone_sectors))
  1564. return -EINVAL;
  1565. return validate_hardware_logical_block_alignment(t, limits);
  1566. }
  1567. /*
  1568. * Check if a target requires flush support even if none of the underlying
  1569. * devices need it (e.g. to persist target-specific metadata).
  1570. */
  1571. static bool dm_table_supports_flush(struct dm_table *t)
  1572. {
  1573. for (unsigned int i = 0; i < t->num_targets; i++) {
  1574. struct dm_target *ti = dm_table_get_target(t, i);
  1575. if (ti->num_flush_bios && ti->flush_supported)
  1576. return true;
  1577. }
  1578. return false;
  1579. }
  1580. static int device_dax_write_cache_enabled(struct dm_target *ti,
  1581. struct dm_dev *dev, sector_t start,
  1582. sector_t len, void *data)
  1583. {
  1584. struct dax_device *dax_dev = dev->dax_dev;
  1585. if (!dax_dev)
  1586. return false;
  1587. if (dax_write_cache_enabled(dax_dev))
  1588. return true;
  1589. return false;
  1590. }
  1591. static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
  1592. sector_t start, sector_t len, void *data)
  1593. {
  1594. struct request_queue *q = bdev_get_queue(dev->bdev);
  1595. int b;
  1596. mutex_lock(&q->limits_lock);
  1597. b = !q->limits.max_write_zeroes_sectors;
  1598. mutex_unlock(&q->limits_lock);
  1599. return b;
  1600. }
  1601. static bool dm_table_supports_write_zeroes(struct dm_table *t)
  1602. {
  1603. for (unsigned int i = 0; i < t->num_targets; i++) {
  1604. struct dm_target *ti = dm_table_get_target(t, i);
  1605. if (!ti->num_write_zeroes_bios)
  1606. return false;
  1607. if (!ti->type->iterate_devices ||
  1608. ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
  1609. return false;
  1610. }
  1611. return true;
  1612. }
  1613. static bool dm_table_supports_nowait(struct dm_table *t)
  1614. {
  1615. for (unsigned int i = 0; i < t->num_targets; i++) {
  1616. struct dm_target *ti = dm_table_get_target(t, i);
  1617. if (!dm_target_supports_nowait(ti->type))
  1618. return false;
  1619. }
  1620. return true;
  1621. }
  1622. static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
  1623. sector_t start, sector_t len, void *data)
  1624. {
  1625. return !bdev_max_discard_sectors(dev->bdev);
  1626. }
  1627. static bool dm_table_supports_discards(struct dm_table *t)
  1628. {
  1629. for (unsigned int i = 0; i < t->num_targets; i++) {
  1630. struct dm_target *ti = dm_table_get_target(t, i);
  1631. if (!ti->num_discard_bios)
  1632. return false;
  1633. /*
  1634. * Either the target provides discard support (as implied by setting
  1635. * 'discards_supported') or it relies on _all_ data devices having
  1636. * discard support.
  1637. */
  1638. if (!ti->discards_supported &&
  1639. (!ti->type->iterate_devices ||
  1640. ti->type->iterate_devices(ti, device_not_discard_capable, NULL)))
  1641. return false;
  1642. }
  1643. return true;
  1644. }
  1645. static int device_not_secure_erase_capable(struct dm_target *ti,
  1646. struct dm_dev *dev, sector_t start,
  1647. sector_t len, void *data)
  1648. {
  1649. return !bdev_max_secure_erase_sectors(dev->bdev);
  1650. }
  1651. static bool dm_table_supports_secure_erase(struct dm_table *t)
  1652. {
  1653. for (unsigned int i = 0; i < t->num_targets; i++) {
  1654. struct dm_target *ti = dm_table_get_target(t, i);
  1655. if (!ti->num_secure_erase_bios)
  1656. return false;
  1657. if (!ti->type->iterate_devices ||
  1658. ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
  1659. return false;
  1660. }
  1661. return true;
  1662. }
  1663. static int device_not_atomic_write_capable(struct dm_target *ti,
  1664. struct dm_dev *dev, sector_t start,
  1665. sector_t len, void *data)
  1666. {
  1667. return !bdev_can_atomic_write(dev->bdev);
  1668. }
  1669. static bool dm_table_supports_atomic_writes(struct dm_table *t)
  1670. {
  1671. for (unsigned int i = 0; i < t->num_targets; i++) {
  1672. struct dm_target *ti = dm_table_get_target(t, i);
  1673. if (!dm_target_supports_atomic_writes(ti->type))
  1674. return false;
  1675. if (!ti->type->iterate_devices)
  1676. return false;
  1677. if (ti->type->iterate_devices(ti,
  1678. device_not_atomic_write_capable, NULL)) {
  1679. return false;
  1680. }
  1681. }
  1682. return true;
  1683. }
  1684. bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
  1685. sector_t new_size)
  1686. {
  1687. if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) &&
  1688. old_size != new_size) {
  1689. DMWARN("%s: device has zone write plug resources. "
  1690. "Cannot change size",
  1691. dm_device_name(t->md));
  1692. return false;
  1693. }
  1694. return true;
  1695. }
  1696. /*
  1697. * This function will be skipped by noflush reloads of immutable request
  1698. * based devices (dm-mpath).
  1699. */
  1700. int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
  1701. struct queue_limits *limits)
  1702. {
  1703. int r;
  1704. struct queue_limits old_limits;
  1705. if (!dm_table_supports_nowait(t))
  1706. limits->features &= ~BLK_FEAT_NOWAIT;
  1707. /*
  1708. * The current polling impementation does not support request based
  1709. * stacking.
  1710. */
  1711. if (!__table_type_bio_based(t->type))
  1712. limits->features &= ~BLK_FEAT_POLL;
  1713. if (!dm_table_supports_discards(t)) {
  1714. limits->max_hw_discard_sectors = 0;
  1715. limits->discard_granularity = 0;
  1716. limits->discard_alignment = 0;
  1717. }
  1718. if (!dm_table_supports_write_zeroes(t)) {
  1719. limits->max_write_zeroes_sectors = 0;
  1720. limits->max_hw_wzeroes_unmap_sectors = 0;
  1721. }
  1722. if (!dm_table_supports_secure_erase(t))
  1723. limits->max_secure_erase_sectors = 0;
  1724. if (dm_table_supports_flush(t))
  1725. limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
  1726. if (dm_table_supports_dax(t, device_not_dax_capable))
  1727. limits->features |= BLK_FEAT_DAX;
  1728. else
  1729. limits->features &= ~BLK_FEAT_DAX;
  1730. /* For a zoned table, setup the zone related queue attributes. */
  1731. if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
  1732. if (limits->features & BLK_FEAT_ZONED) {
  1733. r = dm_set_zones_restrictions(t, q, limits);
  1734. if (r)
  1735. return r;
  1736. } else if (dm_has_zone_plugs(t->md)) {
  1737. DMWARN("%s: device has zone write plug resources. "
  1738. "Cannot switch to non-zoned table.",
  1739. dm_device_name(t->md));
  1740. return -EINVAL;
  1741. }
  1742. }
  1743. if (dm_table_supports_atomic_writes(t))
  1744. limits->features |= BLK_FEAT_ATOMIC_WRITES;
  1745. old_limits = queue_limits_start_update(q);
  1746. r = queue_limits_commit_update(q, limits);
  1747. if (r)
  1748. return r;
  1749. /*
  1750. * Now that the limits are set, check the zones mapped by the table
  1751. * and setup the resources for zone append emulation if necessary.
  1752. */
  1753. if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
  1754. (limits->features & BLK_FEAT_ZONED)) {
  1755. r = dm_revalidate_zones(t, q);
  1756. if (r) {
  1757. queue_limits_set(q, &old_limits);
  1758. return r;
  1759. }
  1760. }
  1761. if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
  1762. dm_finalize_zone_settings(t, limits);
  1763. if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
  1764. set_dax_synchronous(t->md->dax_dev);
  1765. if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
  1766. dax_write_cache(t->md->dax_dev, true);
  1767. dm_update_crypto_profile(q, t);
  1768. return 0;
  1769. }
  1770. struct list_head *dm_table_get_devices(struct dm_table *t)
  1771. {
  1772. return &t->devices;
  1773. }
  1774. blk_mode_t dm_table_get_mode(struct dm_table *t)
  1775. {
  1776. return t->mode;
  1777. }
  1778. EXPORT_SYMBOL(dm_table_get_mode);
  1779. enum suspend_mode {
  1780. PRESUSPEND,
  1781. PRESUSPEND_UNDO,
  1782. POSTSUSPEND,
  1783. };
  1784. static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
  1785. {
  1786. lockdep_assert_held(&t->md->suspend_lock);
  1787. for (unsigned int i = 0; i < t->num_targets; i++) {
  1788. struct dm_target *ti = dm_table_get_target(t, i);
  1789. switch (mode) {
  1790. case PRESUSPEND:
  1791. if (ti->type->presuspend)
  1792. ti->type->presuspend(ti);
  1793. break;
  1794. case PRESUSPEND_UNDO:
  1795. if (ti->type->presuspend_undo)
  1796. ti->type->presuspend_undo(ti);
  1797. break;
  1798. case POSTSUSPEND:
  1799. if (ti->type->postsuspend)
  1800. ti->type->postsuspend(ti);
  1801. break;
  1802. }
  1803. }
  1804. }
  1805. void dm_table_presuspend_targets(struct dm_table *t)
  1806. {
  1807. if (!t)
  1808. return;
  1809. suspend_targets(t, PRESUSPEND);
  1810. }
  1811. void dm_table_presuspend_undo_targets(struct dm_table *t)
  1812. {
  1813. if (!t)
  1814. return;
  1815. suspend_targets(t, PRESUSPEND_UNDO);
  1816. }
  1817. void dm_table_postsuspend_targets(struct dm_table *t)
  1818. {
  1819. if (!t)
  1820. return;
  1821. suspend_targets(t, POSTSUSPEND);
  1822. }
  1823. int dm_table_resume_targets(struct dm_table *t)
  1824. {
  1825. unsigned int i;
  1826. int r = 0;
  1827. lockdep_assert_held(&t->md->suspend_lock);
  1828. for (i = 0; i < t->num_targets; i++) {
  1829. struct dm_target *ti = dm_table_get_target(t, i);
  1830. if (!ti->type->preresume)
  1831. continue;
  1832. r = ti->type->preresume(ti);
  1833. if (r) {
  1834. DMERR("%s: %s: preresume failed, error = %d",
  1835. dm_device_name(t->md), ti->type->name, r);
  1836. return r;
  1837. }
  1838. }
  1839. for (i = 0; i < t->num_targets; i++) {
  1840. struct dm_target *ti = dm_table_get_target(t, i);
  1841. if (ti->type->resume)
  1842. ti->type->resume(ti);
  1843. }
  1844. return 0;
  1845. }
  1846. struct mapped_device *dm_table_get_md(struct dm_table *t)
  1847. {
  1848. return t->md;
  1849. }
  1850. EXPORT_SYMBOL(dm_table_get_md);
  1851. const char *dm_table_device_name(struct dm_table *t)
  1852. {
  1853. return dm_device_name(t->md);
  1854. }
  1855. EXPORT_SYMBOL_GPL(dm_table_device_name);
  1856. void dm_table_run_md_queue_async(struct dm_table *t)
  1857. {
  1858. if (!dm_table_request_based(t))
  1859. return;
  1860. if (t->md->queue)
  1861. blk_mq_run_hw_queues(t->md->queue, true);
  1862. }
  1863. EXPORT_SYMBOL(dm_table_run_md_queue_async);