blocklayout.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2014-2016 Christoph Hellwig.
  4. */
  5. #include <linux/exportfs.h>
  6. #include <linux/iomap.h>
  7. #include <linux/slab.h>
  8. #include <linux/pr.h>
  9. #include <linux/nfsd/debug.h>
  10. #include "blocklayoutxdr.h"
  11. #include "pnfs.h"
  12. #include "filecache.h"
  13. #include "vfs.h"
  14. #include "trace.h"
  15. #define NFSDDBG_FACILITY NFSDDBG_PNFS
  16. /*
  17. * Get an extent from the file system that starts at offset or below
  18. * and may be shorter than the requested length.
  19. */
  20. static __be32
  21. nfsd4_block_map_extent(struct inode *inode, const struct svc_fh *fhp,
  22. u64 offset, u64 length, u32 iomode, u64 minlength,
  23. struct pnfs_block_extent *bex)
  24. {
  25. struct super_block *sb = inode->i_sb;
  26. struct iomap iomap;
  27. u32 device_generation = 0;
  28. int error;
  29. error = sb->s_export_op->map_blocks(inode, offset, length, &iomap,
  30. iomode != IOMODE_READ, &device_generation);
  31. if (error) {
  32. if (error == -ENXIO)
  33. return nfserr_layoutunavailable;
  34. return nfserrno(error);
  35. }
  36. switch (iomap.type) {
  37. case IOMAP_MAPPED:
  38. if (iomode == IOMODE_READ)
  39. bex->es = PNFS_BLOCK_READ_DATA;
  40. else
  41. bex->es = PNFS_BLOCK_READWRITE_DATA;
  42. bex->soff = iomap.addr;
  43. break;
  44. case IOMAP_UNWRITTEN:
  45. if (iomode & IOMODE_RW) {
  46. /*
  47. * Crack monkey special case from section 2.3.1.
  48. */
  49. if (minlength == 0) {
  50. dprintk("pnfsd: no soup for you!\n");
  51. return nfserr_layoutunavailable;
  52. }
  53. bex->es = PNFS_BLOCK_INVALID_DATA;
  54. bex->soff = iomap.addr;
  55. break;
  56. }
  57. fallthrough;
  58. case IOMAP_HOLE:
  59. if (iomode == IOMODE_READ) {
  60. bex->es = PNFS_BLOCK_NONE_DATA;
  61. break;
  62. }
  63. fallthrough;
  64. case IOMAP_DELALLOC:
  65. default:
  66. WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
  67. return nfserr_layoutunavailable;
  68. }
  69. error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
  70. if (error)
  71. return nfserrno(error);
  72. bex->foff = iomap.offset;
  73. bex->len = iomap.length;
  74. return nfs_ok;
  75. }
  76. static __be32
  77. nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
  78. const struct svc_fh *fhp, struct nfsd4_layoutget *args)
  79. {
  80. struct nfsd4_layout_seg *seg = &args->lg_seg;
  81. struct pnfs_block_layout *bl;
  82. struct pnfs_block_extent *first_bex, *last_bex;
  83. u64 offset = seg->offset, length = seg->length;
  84. u32 i, nr_extents_max, block_size = i_blocksize(inode);
  85. __be32 nfserr;
  86. if (locks_in_grace(SVC_NET(rqstp)))
  87. return nfserr_grace;
  88. nfserr = nfserr_layoutunavailable;
  89. if (seg->offset & (block_size - 1)) {
  90. dprintk("pnfsd: I/O misaligned\n");
  91. goto out_error;
  92. }
  93. /*
  94. * RFC 8881, section 3.3.17:
  95. * The layout4 data type defines a layout for a file.
  96. *
  97. * RFC 8881, section 18.43.3:
  98. * The loga_maxcount field specifies the maximum layout size
  99. * (in bytes) that the client can handle. If the size of the
  100. * layout structure exceeds the size specified by maxcount,
  101. * the metadata server will return the NFS4ERR_TOOSMALL error.
  102. */
  103. nfserr = nfserr_toosmall;
  104. if (args->lg_maxcount < PNFS_BLOCK_LAYOUT4_SIZE +
  105. PNFS_BLOCK_EXTENT_SIZE)
  106. goto out_error;
  107. /*
  108. * Limit the maximum layout size to avoid allocating
  109. * a large buffer on the server for each layout request.
  110. */
  111. nr_extents_max = (min(args->lg_maxcount, PAGE_SIZE) -
  112. PNFS_BLOCK_LAYOUT4_SIZE) / PNFS_BLOCK_EXTENT_SIZE;
  113. /*
  114. * Some clients barf on non-zero block numbers for NONE or INVALID
  115. * layouts, so make sure to zero the whole structure.
  116. */
  117. nfserr = nfserrno(-ENOMEM);
  118. bl = kzalloc_flex(*bl, extents, nr_extents_max);
  119. if (!bl)
  120. goto out_error;
  121. bl->nr_extents = nr_extents_max;
  122. args->lg_content = bl;
  123. for (i = 0; i < bl->nr_extents; i++) {
  124. struct pnfs_block_extent *bex = bl->extents + i;
  125. u64 bex_length;
  126. nfserr = nfsd4_block_map_extent(inode, fhp, offset, length,
  127. seg->iomode, args->lg_minlength, bex);
  128. if (nfserr != nfs_ok)
  129. goto out_error;
  130. bex_length = bex->len - (offset - bex->foff);
  131. if (bex_length >= length) {
  132. bl->nr_extents = i + 1;
  133. break;
  134. }
  135. offset = bex->foff + bex->len;
  136. length -= bex_length;
  137. }
  138. first_bex = bl->extents;
  139. last_bex = bl->extents + bl->nr_extents - 1;
  140. nfserr = nfserr_layoutunavailable;
  141. length = last_bex->foff + last_bex->len - seg->offset;
  142. if (length < args->lg_minlength) {
  143. dprintk("pnfsd: extent smaller than minlength\n");
  144. goto out_error;
  145. }
  146. seg->offset = first_bex->foff;
  147. seg->length = last_bex->foff - first_bex->foff + last_bex->len;
  148. return nfs_ok;
  149. out_error:
  150. seg->length = 0;
  151. return nfserr;
  152. }
  153. static __be32
  154. nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
  155. struct iomap *iomaps, int nr_iomaps)
  156. {
  157. struct timespec64 mtime = inode_get_mtime(inode);
  158. struct iattr iattr = { .ia_valid = 0 };
  159. int error;
  160. if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
  161. timespec64_compare(&lcp->lc_mtime, &mtime) < 0)
  162. lcp->lc_mtime = current_time(inode);
  163. iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
  164. iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
  165. if (lcp->lc_size_chg) {
  166. iattr.ia_valid |= ATTR_SIZE;
  167. iattr.ia_size = lcp->lc_newsize;
  168. }
  169. error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
  170. nr_iomaps, &iattr);
  171. kfree(iomaps);
  172. return nfserrno(error);
  173. }
  174. #ifdef CONFIG_NFSD_BLOCKLAYOUT
  175. static int
  176. nfsd4_block_get_device_info_simple(struct super_block *sb,
  177. struct nfsd4_getdeviceinfo *gdp)
  178. {
  179. struct pnfs_block_deviceaddr *dev;
  180. struct pnfs_block_volume *b;
  181. dev = kzalloc_flex(*dev, volumes, 1);
  182. if (!dev)
  183. return -ENOMEM;
  184. gdp->gd_device = dev;
  185. dev->nr_volumes = 1;
  186. b = &dev->volumes[0];
  187. b->type = PNFS_BLOCK_VOLUME_SIMPLE;
  188. b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
  189. return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
  190. &b->simple.offset);
  191. }
  192. static __be32
  193. nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
  194. struct svc_rqst *rqstp,
  195. struct nfs4_client *clp,
  196. struct nfsd4_getdeviceinfo *gdp)
  197. {
  198. if (bdev_is_partition(sb->s_bdev))
  199. return nfserr_inval;
  200. return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
  201. }
  202. static __be32
  203. nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
  204. struct nfsd4_layoutcommit *lcp)
  205. {
  206. struct iomap *iomaps;
  207. int nr_iomaps;
  208. __be32 nfserr;
  209. rqstp->rq_arg = lcp->lc_up_layout;
  210. svcxdr_init_decode(rqstp);
  211. nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
  212. &iomaps, &nr_iomaps, i_blocksize(inode));
  213. if (nfserr != nfs_ok)
  214. return nfserr;
  215. return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
  216. }
  217. const struct nfsd4_layout_ops bl_layout_ops = {
  218. /*
  219. * Pretend that we send notification to the client. This is a blatant
  220. * lie to force recent Linux clients to cache our device IDs.
  221. * We rarely ever change the device ID, so the harm of leaking deviceids
  222. * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
  223. * in this regard, but I filed errata 4119 for this a while ago, and
  224. * hopefully the Linux client will eventually start caching deviceids
  225. * without this again.
  226. */
  227. .notify_types =
  228. NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
  229. .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
  230. .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
  231. .proc_layoutget = nfsd4_block_proc_layoutget,
  232. .encode_layoutget = nfsd4_block_encode_layoutget,
  233. .proc_layoutcommit = nfsd4_block_proc_layoutcommit,
  234. };
  235. #endif /* CONFIG_NFSD_BLOCKLAYOUT */
  236. #ifdef CONFIG_NFSD_SCSILAYOUT
  237. #define NFSD_MDS_PR_KEY 0x0100000000000000ULL
  238. /*
  239. * We use the client ID as a unique key for the reservations.
  240. * This allows us to easily fence a client when recalls fail.
  241. */
  242. static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
  243. {
  244. return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
  245. }
  246. static const u8 designator_types[] = {
  247. PS_DESIGNATOR_EUI64,
  248. PS_DESIGNATOR_NAA,
  249. };
  250. static int
  251. nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b)
  252. {
  253. int ret, i;
  254. for (i = 0; i < ARRAY_SIZE(designator_types); i++) {
  255. u8 type = designator_types[i];
  256. ret = disk->fops->get_unique_id(disk, b->scsi.designator, type);
  257. if (ret > 0) {
  258. b->scsi.code_set = PS_CODE_SET_BINARY;
  259. b->scsi.designator_type = type;
  260. b->scsi.designator_len = ret;
  261. return 0;
  262. }
  263. }
  264. return -EINVAL;
  265. }
  266. static int
  267. nfsd4_block_get_device_info_scsi(struct super_block *sb,
  268. struct nfs4_client *clp,
  269. struct nfsd4_getdeviceinfo *gdp)
  270. {
  271. struct pnfs_block_deviceaddr *dev;
  272. struct pnfs_block_volume *b;
  273. const struct pr_ops *ops;
  274. int ret;
  275. dev = kzalloc_flex(*dev, volumes, 1);
  276. if (!dev)
  277. return -ENOMEM;
  278. gdp->gd_device = dev;
  279. dev->nr_volumes = 1;
  280. b = &dev->volumes[0];
  281. b->type = PNFS_BLOCK_VOLUME_SCSI;
  282. b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
  283. ret = nfsd4_block_get_unique_id(sb->s_bdev->bd_disk, b);
  284. if (ret < 0)
  285. goto out_free_dev;
  286. ret = -EINVAL;
  287. ops = sb->s_bdev->bd_disk->fops->pr_ops;
  288. if (!ops) {
  289. pr_err("pNFS: device %s does not support PRs.\n",
  290. sb->s_id);
  291. goto out_free_dev;
  292. }
  293. ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
  294. if (ret) {
  295. pr_err("pNFS: failed to register key for device %s.\n",
  296. sb->s_id);
  297. goto out_free_dev;
  298. }
  299. ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
  300. PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
  301. if (ret) {
  302. pr_err("pNFS: failed to reserve device %s.\n",
  303. sb->s_id);
  304. goto out_free_dev;
  305. }
  306. return 0;
  307. out_free_dev:
  308. kfree(dev);
  309. gdp->gd_device = NULL;
  310. return ret;
  311. }
  312. static __be32
  313. nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
  314. struct svc_rqst *rqstp,
  315. struct nfs4_client *clp,
  316. struct nfsd4_getdeviceinfo *gdp)
  317. {
  318. if (bdev_is_partition(sb->s_bdev))
  319. return nfserr_inval;
  320. return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
  321. }
  322. static __be32
  323. nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
  324. struct nfsd4_layoutcommit *lcp)
  325. {
  326. struct iomap *iomaps;
  327. int nr_iomaps;
  328. __be32 nfserr;
  329. rqstp->rq_arg = lcp->lc_up_layout;
  330. svcxdr_init_decode(rqstp);
  331. nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
  332. &iomaps, &nr_iomaps, i_blocksize(inode));
  333. if (nfserr != nfs_ok)
  334. return nfserr;
  335. return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
  336. }
  337. static void
  338. nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
  339. {
  340. struct nfs4_client *clp = ls->ls_stid.sc_client;
  341. struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
  342. int status;
  343. status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
  344. nfsd4_scsi_pr_key(clp),
  345. PR_EXCLUSIVE_ACCESS_REG_ONLY, true);
  346. trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
  347. }
  348. const struct nfsd4_layout_ops scsi_layout_ops = {
  349. /*
  350. * Pretend that we send notification to the client. This is a blatant
  351. * lie to force recent Linux clients to cache our device IDs.
  352. * We rarely ever change the device ID, so the harm of leaking deviceids
  353. * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
  354. * in this regard, but I filed errata 4119 for this a while ago, and
  355. * hopefully the Linux client will eventually start caching deviceids
  356. * without this again.
  357. */
  358. .notify_types =
  359. NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
  360. .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
  361. .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
  362. .proc_layoutget = nfsd4_block_proc_layoutget,
  363. .encode_layoutget = nfsd4_block_encode_layoutget,
  364. .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
  365. .fence_client = nfsd4_scsi_fence_client,
  366. };
  367. #endif /* CONFIG_NFSD_SCSILAYOUT */