localio.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * NFS client support for local clients to bypass network stack
  4. *
  5. * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
  6. * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
  7. * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
  8. * Copyright (C) 2024 NeilBrown <neilb@suse.de>
  9. */
  10. #include <linux/module.h>
  11. #include <linux/errno.h>
  12. #include <linux/vfs.h>
  13. #include <linux/file.h>
  14. #include <linux/inet.h>
  15. #include <linux/sunrpc/addr.h>
  16. #include <linux/inetdevice.h>
  17. #include <net/addrconf.h>
  18. #include <linux/nfs_common.h>
  19. #include <linux/nfslocalio.h>
  20. #include <linux/bvec.h>
  21. #include <linux/nfs.h>
  22. #include <linux/nfs_fs.h>
  23. #include <linux/nfs_xdr.h>
  24. #include "internal.h"
  25. #include "pnfs.h"
  26. #include "nfstrace.h"
  27. #define NFSDBG_FACILITY NFSDBG_VFS
  28. #define NFSLOCAL_MAX_IOS 3
  29. struct nfs_local_kiocb {
  30. struct kiocb kiocb;
  31. struct bio_vec *bvec;
  32. struct nfs_pgio_header *hdr;
  33. struct work_struct work;
  34. void (*aio_complete_work)(struct work_struct *);
  35. struct nfsd_file *localio;
  36. /* Begin mostly DIO-specific members */
  37. size_t end_len;
  38. short int end_iter_index;
  39. atomic_t n_iters;
  40. struct iov_iter iters[NFSLOCAL_MAX_IOS];
  41. bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
  42. /* End mostly DIO-specific members */
  43. };
  44. struct nfs_local_fsync_ctx {
  45. struct nfsd_file *localio;
  46. struct nfs_commit_data *data;
  47. struct work_struct work;
  48. struct completion *done;
  49. };
  50. static bool localio_enabled __read_mostly = true;
  51. module_param(localio_enabled, bool, 0644);
  52. static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
  53. const struct rpc_call_ops *call_ops);
  54. static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
  55. const struct rpc_call_ops *call_ops);
  56. static inline bool nfs_client_is_local(const struct nfs_client *clp)
  57. {
  58. return !!rcu_access_pointer(clp->cl_uuid.net);
  59. }
  60. bool nfs_server_is_local(const struct nfs_client *clp)
  61. {
  62. return nfs_client_is_local(clp) && localio_enabled;
  63. }
  64. EXPORT_SYMBOL_GPL(nfs_server_is_local);
  65. /*
  66. * UUID_IS_LOCAL XDR functions
  67. */
  68. static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
  69. struct xdr_stream *xdr,
  70. const void *data)
  71. {
  72. const u8 *uuid = data;
  73. encode_opaque_fixed(xdr, uuid, UUID_SIZE);
  74. }
  75. static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
  76. struct xdr_stream *xdr,
  77. void *result)
  78. {
  79. /* void return */
  80. return 0;
  81. }
  82. static const struct rpc_procinfo nfs_localio_procedures[] = {
  83. [LOCALIOPROC_UUID_IS_LOCAL] = {
  84. .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
  85. .p_encode = localio_xdr_enc_uuidargs,
  86. .p_decode = localio_xdr_dec_uuidres,
  87. .p_arglen = XDR_QUADLEN(UUID_SIZE),
  88. .p_replen = 0,
  89. .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
  90. .p_name = "UUID_IS_LOCAL",
  91. },
  92. };
  93. static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
  94. static const struct rpc_version nfslocalio_version1 = {
  95. .number = 1,
  96. .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
  97. .procs = nfs_localio_procedures,
  98. .counts = nfs_localio_counts,
  99. };
  100. static const struct rpc_version *nfslocalio_version[] = {
  101. [1] = &nfslocalio_version1,
  102. };
  103. extern const struct rpc_program nfslocalio_program;
  104. static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
  105. const struct rpc_program nfslocalio_program = {
  106. .name = "nfslocalio",
  107. .number = NFS_LOCALIO_PROGRAM,
  108. .nrvers = ARRAY_SIZE(nfslocalio_version),
  109. .version = nfslocalio_version,
  110. .stats = &nfslocalio_rpcstat,
  111. };
  112. /*
  113. * nfs_init_localioclient - Initialise an NFS localio client connection
  114. */
  115. static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
  116. {
  117. struct rpc_clnt *rpcclient_localio;
  118. rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
  119. &nfslocalio_program, 1);
  120. dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
  121. __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
  122. (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
  123. return rpcclient_localio;
  124. }
  125. static bool nfs_server_uuid_is_local(struct nfs_client *clp)
  126. {
  127. u8 uuid[UUID_SIZE];
  128. struct rpc_message msg = {
  129. .rpc_argp = &uuid,
  130. };
  131. struct rpc_clnt *rpcclient_localio;
  132. int status;
  133. rpcclient_localio = nfs_init_localioclient(clp);
  134. if (IS_ERR(rpcclient_localio))
  135. return false;
  136. export_uuid(uuid, &clp->cl_uuid.uuid);
  137. msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
  138. status = rpc_call_sync(rpcclient_localio, &msg, 0);
  139. dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
  140. __func__, status);
  141. rpc_shutdown_client(rpcclient_localio);
  142. /* Server is only local if it initialized required struct members */
  143. if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
  144. return false;
  145. return true;
  146. }
  147. /*
  148. * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
  149. * - called after alloc_client and init_client (so cl_rpcclient exists)
  150. * - this function is idempotent, it can be called for old or new clients
  151. */
  152. static void nfs_local_probe(struct nfs_client *clp)
  153. {
  154. /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
  155. if (!localio_enabled ||
  156. clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
  157. nfs_localio_disable_client(clp);
  158. return;
  159. }
  160. if (nfs_client_is_local(clp))
  161. return;
  162. if (!nfs_uuid_begin(&clp->cl_uuid))
  163. return;
  164. if (nfs_server_uuid_is_local(clp))
  165. nfs_localio_enable_client(clp);
  166. nfs_uuid_end(&clp->cl_uuid);
  167. }
  168. void nfs_local_probe_async_work(struct work_struct *work)
  169. {
  170. struct nfs_client *clp =
  171. container_of(work, struct nfs_client, cl_local_probe_work);
  172. if (!refcount_inc_not_zero(&clp->cl_count))
  173. return;
  174. nfs_local_probe(clp);
  175. nfs_put_client(clp);
  176. }
  177. void nfs_local_probe_async(struct nfs_client *clp)
  178. {
  179. queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
  180. }
  181. EXPORT_SYMBOL_GPL(nfs_local_probe_async);
  182. static inline void nfs_local_file_put(struct nfsd_file *localio)
  183. {
  184. /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
  185. * but we have a __kernel pointer. It is always safe
  186. * to cast a __kernel pointer to an __rcu pointer
  187. * because the cast only weakens what is known about the pointer.
  188. */
  189. struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
  190. nfs_to_nfsd_file_put_local(&nf);
  191. }
  192. /*
  193. * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
  194. *
  195. * Returns a pointer to a struct nfsd_file or ERR_PTR.
  196. * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
  197. */
  198. static struct nfsd_file *
  199. __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
  200. struct nfs_fh *fh, struct nfs_file_localio *nfl,
  201. struct nfsd_file __rcu **pnf,
  202. const fmode_t mode)
  203. {
  204. int status = 0;
  205. struct nfsd_file *localio;
  206. localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
  207. cred, fh, nfl, pnf, mode);
  208. if (IS_ERR(localio)) {
  209. status = PTR_ERR(localio);
  210. switch (status) {
  211. case -ENOMEM:
  212. case -ENXIO:
  213. case -ENOENT:
  214. /* Revalidate localio */
  215. nfs_localio_disable_client(clp);
  216. nfs_local_probe(clp);
  217. }
  218. }
  219. trace_nfs_local_open_fh(fh, mode, status);
  220. return localio;
  221. }
  222. /*
  223. * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
  224. * First checking if the open nfsd_file is already cached, otherwise
  225. * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
  226. *
  227. * Returns a pointer to a struct nfsd_file or NULL.
  228. */
  229. struct nfsd_file *
  230. nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
  231. struct nfs_fh *fh, struct nfs_file_localio *nfl,
  232. const fmode_t mode)
  233. {
  234. struct nfsd_file *nf, __rcu **pnf;
  235. if (!nfs_server_is_local(clp))
  236. return NULL;
  237. if (mode & ~(FMODE_READ | FMODE_WRITE))
  238. return NULL;
  239. if (mode & FMODE_WRITE)
  240. pnf = &nfl->rw_file;
  241. else
  242. pnf = &nfl->ro_file;
  243. nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
  244. if (IS_ERR(nf))
  245. return NULL;
  246. return nf;
  247. }
  248. EXPORT_SYMBOL_GPL(nfs_local_open_fh);
  249. /*
  250. * Ensure all page cache allocations are done from GFP_NOFS context to
  251. * prevent direct reclaim recursion back into NFS via nfs_writepages.
  252. */
  253. static void
  254. nfs_local_mapping_set_gfp_nofs_context(struct address_space *m)
  255. {
  256. gfp_t gfp_mask = mapping_gfp_mask(m);
  257. mapping_set_gfp_mask(m, (gfp_mask & ~(__GFP_FS)));
  258. }
  259. static void
  260. nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
  261. {
  262. kfree(iocb->bvec);
  263. kfree(iocb);
  264. }
  265. static struct nfs_local_kiocb *
  266. nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
  267. struct file *file, gfp_t flags)
  268. {
  269. struct nfs_local_kiocb *iocb;
  270. iocb = kzalloc_obj(*iocb, flags);
  271. if (iocb == NULL)
  272. return NULL;
  273. iocb->bvec = kmalloc_objs(struct bio_vec, hdr->page_array.npages, flags);
  274. if (iocb->bvec == NULL) {
  275. kfree(iocb);
  276. return NULL;
  277. }
  278. nfs_local_mapping_set_gfp_nofs_context(file->f_mapping);
  279. init_sync_kiocb(&iocb->kiocb, file);
  280. iocb->hdr = hdr;
  281. iocb->kiocb.ki_pos = hdr->args.offset;
  282. iocb->kiocb.ki_flags &= ~IOCB_APPEND;
  283. iocb->kiocb.ki_complete = NULL;
  284. iocb->aio_complete_work = NULL;
  285. iocb->end_iter_index = -1;
  286. return iocb;
  287. }
  288. static bool
  289. nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
  290. size_t len, struct nfs_local_dio *local_dio)
  291. {
  292. struct nfs_pgio_header *hdr = iocb->hdr;
  293. loff_t offset = hdr->args.offset;
  294. u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
  295. loff_t start_end, orig_end, middle_end;
  296. nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
  297. &nf_dio_offset_align, &nf_dio_read_offset_align);
  298. if (rw == ITER_DEST)
  299. nf_dio_offset_align = nf_dio_read_offset_align;
  300. if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
  301. return false;
  302. if (unlikely(len < nf_dio_offset_align))
  303. return false;
  304. local_dio->mem_align = nf_dio_mem_align;
  305. local_dio->offset_align = nf_dio_offset_align;
  306. start_end = round_up(offset, nf_dio_offset_align);
  307. orig_end = offset + len;
  308. middle_end = round_down(orig_end, nf_dio_offset_align);
  309. local_dio->middle_offset = start_end;
  310. local_dio->end_offset = middle_end;
  311. local_dio->start_len = start_end - offset;
  312. local_dio->middle_len = middle_end - start_end;
  313. local_dio->end_len = orig_end - middle_end;
  314. if (rw == ITER_DEST)
  315. trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
  316. else
  317. trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
  318. return true;
  319. }
  320. static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
  321. unsigned int addr_mask, unsigned int len_mask)
  322. {
  323. const struct bio_vec *bvec = i->bvec;
  324. size_t skip = i->iov_offset;
  325. size_t size = i->count;
  326. if (size & len_mask)
  327. return false;
  328. do {
  329. size_t len = bvec->bv_len;
  330. if (len > size)
  331. len = size;
  332. if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
  333. return false;
  334. bvec++;
  335. size -= len;
  336. skip = 0;
  337. } while (size);
  338. return true;
  339. }
  340. static void
  341. nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
  342. unsigned int nvecs, unsigned long total,
  343. size_t start, size_t len)
  344. {
  345. iov_iter_bvec(iter, rw, bvec, nvecs, total);
  346. if (start)
  347. iov_iter_advance(iter, start);
  348. iov_iter_truncate(iter, len);
  349. }
  350. /*
  351. * Setup as many as 3 iov_iter based on extents described by @local_dio.
  352. * Returns the number of iov_iter that were setup.
  353. */
  354. static int
  355. nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
  356. unsigned int nvecs, unsigned long total,
  357. struct nfs_local_dio *local_dio)
  358. {
  359. int n_iters = 0;
  360. struct iov_iter *iters = iocb->iters;
  361. /* Setup misaligned start? */
  362. if (local_dio->start_len) {
  363. nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
  364. nvecs, total, 0, local_dio->start_len);
  365. ++n_iters;
  366. }
  367. /*
  368. * Setup DIO-aligned middle, if there is no misaligned end (below)
  369. * then AIO completion is used, see nfs_local_call_{read,write}
  370. */
  371. nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
  372. total, local_dio->start_len, local_dio->middle_len);
  373. iocb->iter_is_dio_aligned[n_iters] =
  374. nfs_iov_iter_aligned_bvec(&iters[n_iters],
  375. local_dio->mem_align-1, local_dio->offset_align-1);
  376. if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
  377. trace_nfs_local_dio_misaligned(iocb->hdr->inode,
  378. local_dio->start_len, local_dio->middle_len, local_dio);
  379. return 0; /* no DIO-aligned IO possible */
  380. }
  381. iocb->end_iter_index = n_iters;
  382. ++n_iters;
  383. /* Setup misaligned end? */
  384. if (local_dio->end_len) {
  385. nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
  386. nvecs, total, local_dio->start_len +
  387. local_dio->middle_len, local_dio->end_len);
  388. iocb->end_iter_index = n_iters;
  389. ++n_iters;
  390. }
  391. atomic_set(&iocb->n_iters, n_iters);
  392. return n_iters;
  393. }
  394. static noinline_for_stack void
  395. nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
  396. {
  397. struct nfs_pgio_header *hdr = iocb->hdr;
  398. struct page **pagevec = hdr->page_array.pagevec;
  399. unsigned long v, total;
  400. unsigned int base;
  401. size_t len;
  402. v = 0;
  403. total = hdr->args.count;
  404. base = hdr->args.pgbase;
  405. pagevec += base >> PAGE_SHIFT;
  406. base &= ~PAGE_MASK;
  407. while (total && v < hdr->page_array.npages) {
  408. len = min_t(size_t, total, PAGE_SIZE - base);
  409. bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
  410. total -= len;
  411. ++pagevec;
  412. ++v;
  413. base = 0;
  414. }
  415. len = hdr->args.count - total;
  416. /*
  417. * For each iocb, iocb->n_iters is always at least 1 and we always
  418. * end io after first nfs_local_pgio_done call unless misaligned DIO.
  419. */
  420. atomic_set(&iocb->n_iters, 1);
  421. if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
  422. struct nfs_local_dio local_dio;
  423. if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
  424. nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
  425. /* Ensure DIO WRITE's IO on stable storage upon completion */
  426. if (rw == ITER_SOURCE)
  427. iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
  428. return; /* is DIO-aligned */
  429. }
  430. }
  431. /* Use buffered IO */
  432. iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
  433. }
  434. static void
  435. nfs_local_hdr_release(struct nfs_pgio_header *hdr,
  436. const struct rpc_call_ops *call_ops)
  437. {
  438. call_ops->rpc_call_done(&hdr->task, hdr);
  439. call_ops->rpc_release(hdr);
  440. }
  441. static void
  442. nfs_local_pgio_init(struct nfs_pgio_header *hdr,
  443. const struct rpc_call_ops *call_ops)
  444. {
  445. hdr->task.tk_ops = call_ops;
  446. if (!hdr->task.tk_start)
  447. hdr->task.tk_start = ktime_get();
  448. }
  449. static bool nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status)
  450. {
  451. struct nfs_pgio_header *hdr = iocb->hdr;
  452. /* Must handle partial completions */
  453. if (status >= 0) {
  454. hdr->res.count += status;
  455. /* @hdr was initialized to 0 (zeroed during allocation) */
  456. if (hdr->task.tk_status == 0)
  457. hdr->res.op_status = NFS4_OK;
  458. } else {
  459. hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
  460. hdr->task.tk_status = status;
  461. }
  462. BUG_ON(atomic_read(&iocb->n_iters) <= 0);
  463. return atomic_dec_and_test(&iocb->n_iters);
  464. }
  465. static void
  466. nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
  467. {
  468. nfs_local_file_put(iocb->localio);
  469. nfs_local_iocb_free(iocb);
  470. }
  471. static void nfs_local_pgio_restart(struct nfs_local_kiocb *iocb,
  472. struct nfs_pgio_header *hdr)
  473. {
  474. int status = 0;
  475. iocb->kiocb.ki_pos = hdr->args.offset;
  476. iocb->kiocb.ki_flags &= ~(IOCB_DSYNC | IOCB_SYNC | IOCB_DIRECT);
  477. iocb->kiocb.ki_complete = NULL;
  478. iocb->aio_complete_work = NULL;
  479. iocb->end_iter_index = -1;
  480. switch (hdr->rw_mode) {
  481. case FMODE_READ:
  482. nfs_local_iters_init(iocb, ITER_DEST);
  483. nfs_local_do_read(iocb, hdr->task.tk_ops);
  484. break;
  485. case FMODE_WRITE:
  486. nfs_local_iters_init(iocb, ITER_SOURCE);
  487. nfs_local_do_write(iocb, hdr->task.tk_ops);
  488. break;
  489. default:
  490. status = -EOPNOTSUPP;
  491. }
  492. if (unlikely(status != 0)) {
  493. nfs_local_iocb_release(iocb);
  494. hdr->task.tk_status = status;
  495. nfs_local_hdr_release(hdr, hdr->task.tk_ops);
  496. }
  497. }
  498. static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
  499. {
  500. struct nfs_pgio_header *hdr = iocb->hdr;
  501. struct rpc_task *task = &hdr->task;
  502. task->tk_action = NULL;
  503. task->tk_ops->rpc_call_done(task, hdr);
  504. if (task->tk_action == NULL) {
  505. nfs_local_iocb_release(iocb);
  506. task->tk_ops->rpc_release(hdr);
  507. } else
  508. nfs_local_pgio_restart(iocb, hdr);
  509. }
  510. /*
  511. * Complete the I/O from iocb->kiocb.ki_complete()
  512. *
  513. * Note that this function can be called from a bottom half context,
  514. * hence we need to queue the rpc_call_done() etc to a workqueue
  515. */
  516. static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
  517. {
  518. INIT_WORK(&iocb->work, iocb->aio_complete_work);
  519. queue_work(nfsiod_workqueue, &iocb->work);
  520. }
  521. static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
  522. {
  523. struct nfs_pgio_header *hdr = iocb->hdr;
  524. struct file *filp = iocb->kiocb.ki_filp;
  525. long status = hdr->task.tk_status;
  526. if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
  527. /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
  528. pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
  529. }
  530. /*
  531. * Must clear replen otherwise NFSv3 data corruption will occur
  532. * if/when switching from LOCALIO back to using normal RPC.
  533. */
  534. hdr->res.replen = 0;
  535. /* nfs_readpage_result() handles short read */
  536. if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
  537. hdr->res.eof = true;
  538. dprintk("%s: read %ld bytes eof %d.\n", __func__,
  539. status > 0 ? status : 0, hdr->res.eof);
  540. }
  541. static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
  542. {
  543. nfs_local_read_done(iocb);
  544. nfs_local_pgio_release(iocb);
  545. }
  546. static void nfs_local_read_aio_complete_work(struct work_struct *work)
  547. {
  548. struct nfs_local_kiocb *iocb =
  549. container_of(work, struct nfs_local_kiocb, work);
  550. nfs_local_read_iocb_done(iocb);
  551. }
  552. static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
  553. {
  554. struct nfs_local_kiocb *iocb =
  555. container_of(kiocb, struct nfs_local_kiocb, kiocb);
  556. /* AIO completion of DIO read should always be last to complete */
  557. if (unlikely(!nfs_local_pgio_done(iocb, ret)))
  558. return;
  559. nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
  560. }
  561. static void nfs_local_call_read(struct work_struct *work)
  562. {
  563. struct nfs_local_kiocb *iocb =
  564. container_of(work, struct nfs_local_kiocb, work);
  565. struct file *filp = iocb->kiocb.ki_filp;
  566. ssize_t status;
  567. int n_iters;
  568. n_iters = atomic_read(&iocb->n_iters);
  569. for (int i = 0; i < n_iters ; i++) {
  570. if (iocb->iter_is_dio_aligned[i]) {
  571. iocb->kiocb.ki_flags |= IOCB_DIRECT;
  572. /* Only use AIO completion if DIO-aligned segment is last */
  573. if (i == iocb->end_iter_index) {
  574. iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
  575. iocb->aio_complete_work = nfs_local_read_aio_complete_work;
  576. }
  577. } else
  578. iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
  579. scoped_with_creds(filp->f_cred)
  580. status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
  581. if (status == -EIOCBQUEUED)
  582. continue;
  583. /* Break on completion, errors, or short reads */
  584. if (nfs_local_pgio_done(iocb, status) || status < 0 ||
  585. (size_t)status < iov_iter_count(&iocb->iters[i])) {
  586. nfs_local_read_iocb_done(iocb);
  587. break;
  588. }
  589. }
  590. }
  591. static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
  592. const struct rpc_call_ops *call_ops)
  593. {
  594. struct nfs_pgio_header *hdr = iocb->hdr;
  595. dprintk("%s: vfs_read count=%u pos=%llu\n",
  596. __func__, hdr->args.count, hdr->args.offset);
  597. nfs_local_pgio_init(hdr, call_ops);
  598. hdr->res.eof = false;
  599. INIT_WORK(&iocb->work, nfs_local_call_read);
  600. queue_work(nfslocaliod_workqueue, &iocb->work);
  601. }
  602. static void
  603. nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
  604. {
  605. struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
  606. u32 *verf = (u32 *)verifier->data;
  607. unsigned int seq;
  608. do {
  609. seq = read_seqbegin(&clp->cl_boot_lock);
  610. verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
  611. verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
  612. } while (read_seqretry(&clp->cl_boot_lock, seq));
  613. }
  614. static void
  615. nfs_reset_boot_verifier(struct inode *inode)
  616. {
  617. struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
  618. write_seqlock(&clp->cl_boot_lock);
  619. ktime_get_real_ts64(&clp->cl_nfssvc_boot);
  620. write_sequnlock(&clp->cl_boot_lock);
  621. }
  622. static void
  623. nfs_set_local_verifier(struct inode *inode,
  624. struct nfs_writeverf *verf,
  625. enum nfs3_stable_how how)
  626. {
  627. nfs_copy_boot_verifier(&verf->verifier, inode);
  628. verf->committed = how;
  629. }
  630. /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
  631. static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
  632. {
  633. u32 request_mask = STATX_BASIC_STATS;
  634. if (version == 4)
  635. request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
  636. return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
  637. }
  638. /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
  639. static u64 __nfsd4_change_attribute(const struct kstat *stat,
  640. const struct inode *inode)
  641. {
  642. u64 chattr;
  643. if (stat->result_mask & STATX_CHANGE_COOKIE) {
  644. chattr = stat->change_cookie;
  645. if (S_ISREG(inode->i_mode) &&
  646. !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
  647. chattr += (u64)stat->ctime.tv_sec << 30;
  648. chattr += stat->ctime.tv_nsec;
  649. }
  650. } else {
  651. chattr = time_to_chattr(&stat->ctime);
  652. }
  653. return chattr;
  654. }
  655. static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
  656. {
  657. struct kstat stat;
  658. struct file *filp = iocb->kiocb.ki_filp;
  659. struct nfs_pgio_header *hdr = iocb->hdr;
  660. struct nfs_fattr *fattr = hdr->res.fattr;
  661. int version = NFS_PROTO(hdr->inode)->version;
  662. if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
  663. return;
  664. fattr->valid = (NFS_ATTR_FATTR_FILEID |
  665. NFS_ATTR_FATTR_CHANGE |
  666. NFS_ATTR_FATTR_SIZE |
  667. NFS_ATTR_FATTR_ATIME |
  668. NFS_ATTR_FATTR_MTIME |
  669. NFS_ATTR_FATTR_CTIME |
  670. NFS_ATTR_FATTR_SPACE_USED);
  671. fattr->fileid = stat.ino;
  672. fattr->size = stat.size;
  673. fattr->atime = stat.atime;
  674. fattr->mtime = stat.mtime;
  675. fattr->ctime = stat.ctime;
  676. if (version == 4) {
  677. fattr->change_attr =
  678. __nfsd4_change_attribute(&stat, file_inode(filp));
  679. } else
  680. fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
  681. fattr->du.nfs3.used = stat.blocks << 9;
  682. }
  683. static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
  684. {
  685. struct nfs_pgio_header *hdr = iocb->hdr;
  686. long status = hdr->task.tk_status;
  687. dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
  688. if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
  689. /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
  690. pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
  691. }
  692. if (status < 0)
  693. nfs_reset_boot_verifier(hdr->inode);
  694. }
  695. static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
  696. {
  697. nfs_local_write_done(iocb);
  698. nfs_local_vfs_getattr(iocb);
  699. nfs_local_pgio_release(iocb);
  700. }
  701. static void nfs_local_write_aio_complete_work(struct work_struct *work)
  702. {
  703. struct nfs_local_kiocb *iocb =
  704. container_of(work, struct nfs_local_kiocb, work);
  705. nfs_local_write_iocb_done(iocb);
  706. }
  707. static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
  708. {
  709. struct nfs_local_kiocb *iocb =
  710. container_of(kiocb, struct nfs_local_kiocb, kiocb);
  711. /* AIO completion of DIO write should always be last to complete */
  712. if (unlikely(!nfs_local_pgio_done(iocb, ret)))
  713. return;
  714. nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
  715. }
  716. static void nfs_local_call_write(struct work_struct *work)
  717. {
  718. struct nfs_local_kiocb *iocb =
  719. container_of(work, struct nfs_local_kiocb, work);
  720. struct file *filp = iocb->kiocb.ki_filp;
  721. unsigned long old_flags = current->flags;
  722. ssize_t status;
  723. int n_iters;
  724. current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
  725. file_start_write(filp);
  726. n_iters = atomic_read(&iocb->n_iters);
  727. for (int i = 0; i < n_iters ; i++) {
  728. if (iocb->iter_is_dio_aligned[i]) {
  729. iocb->kiocb.ki_flags |= IOCB_DIRECT;
  730. /* Only use AIO completion if DIO-aligned segment is last */
  731. if (i == iocb->end_iter_index) {
  732. iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
  733. iocb->aio_complete_work = nfs_local_write_aio_complete_work;
  734. }
  735. } else
  736. iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
  737. scoped_with_creds(filp->f_cred)
  738. status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
  739. if (status == -EIOCBQUEUED)
  740. continue;
  741. /* Break on completion, errors, or short writes */
  742. if (nfs_local_pgio_done(iocb, status) || status < 0 ||
  743. (size_t)status < iov_iter_count(&iocb->iters[i])) {
  744. nfs_local_write_iocb_done(iocb);
  745. break;
  746. }
  747. }
  748. file_end_write(filp);
  749. current->flags = old_flags;
  750. }
  751. static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
  752. const struct rpc_call_ops *call_ops)
  753. {
  754. struct nfs_pgio_header *hdr = iocb->hdr;
  755. dprintk("%s: vfs_write count=%u pos=%llu %s\n",
  756. __func__, hdr->args.count, hdr->args.offset,
  757. (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
  758. switch (hdr->args.stable) {
  759. default:
  760. break;
  761. case NFS_DATA_SYNC:
  762. iocb->kiocb.ki_flags |= IOCB_DSYNC;
  763. break;
  764. case NFS_FILE_SYNC:
  765. iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
  766. }
  767. nfs_local_pgio_init(hdr, call_ops);
  768. nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
  769. INIT_WORK(&iocb->work, nfs_local_call_write);
  770. queue_work(nfslocaliod_workqueue, &iocb->work);
  771. }
  772. static struct nfs_local_kiocb *
  773. nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
  774. {
  775. struct file *file = nfs_to->nfsd_file_file(localio);
  776. struct nfs_local_kiocb *iocb;
  777. gfp_t gfp_mask;
  778. int rw;
  779. if (hdr->rw_mode & FMODE_READ) {
  780. if (!file->f_op->read_iter)
  781. return ERR_PTR(-EOPNOTSUPP);
  782. gfp_mask = GFP_KERNEL;
  783. rw = ITER_DEST;
  784. } else {
  785. if (!file->f_op->write_iter)
  786. return ERR_PTR(-EOPNOTSUPP);
  787. gfp_mask = GFP_NOIO;
  788. rw = ITER_SOURCE;
  789. }
  790. iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
  791. if (iocb == NULL)
  792. return ERR_PTR(-ENOMEM);
  793. iocb->hdr = hdr;
  794. iocb->localio = localio;
  795. nfs_local_iters_init(iocb, rw);
  796. return iocb;
  797. }
  798. int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
  799. struct nfs_pgio_header *hdr,
  800. const struct rpc_call_ops *call_ops)
  801. {
  802. struct nfs_local_kiocb *iocb;
  803. int status = 0;
  804. if (!hdr->args.count)
  805. return 0;
  806. iocb = nfs_local_iocb_init(hdr, localio);
  807. if (IS_ERR(iocb))
  808. return PTR_ERR(iocb);
  809. switch (hdr->rw_mode) {
  810. case FMODE_READ:
  811. nfs_local_do_read(iocb, call_ops);
  812. break;
  813. case FMODE_WRITE:
  814. nfs_local_do_write(iocb, call_ops);
  815. break;
  816. default:
  817. dprintk("%s: invalid mode: %d\n", __func__,
  818. hdr->rw_mode);
  819. status = -EOPNOTSUPP;
  820. }
  821. if (unlikely(status != 0)) {
  822. nfs_local_iocb_release(iocb);
  823. hdr->task.tk_status = status;
  824. nfs_local_hdr_release(hdr, call_ops);
  825. }
  826. return status;
  827. }
  828. static void
  829. nfs_local_init_commit(struct nfs_commit_data *data,
  830. const struct rpc_call_ops *call_ops)
  831. {
  832. data->task.tk_ops = call_ops;
  833. }
  834. static int
  835. nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
  836. {
  837. loff_t start = data->args.offset;
  838. loff_t end = LLONG_MAX;
  839. if (data->args.count > 0) {
  840. end = start + data->args.count - 1;
  841. if (end < start)
  842. end = LLONG_MAX;
  843. }
  844. nfs_local_mapping_set_gfp_nofs_context(filp->f_mapping);
  845. dprintk("%s: commit %llu - %llu\n", __func__, start, end);
  846. return vfs_fsync_range(filp, start, end, 0);
  847. }
  848. static void
  849. nfs_local_commit_done(struct nfs_commit_data *data, int status)
  850. {
  851. if (status >= 0) {
  852. nfs_set_local_verifier(data->inode,
  853. data->res.verf,
  854. NFS_FILE_SYNC);
  855. data->res.op_status = NFS4_OK;
  856. data->task.tk_status = 0;
  857. } else {
  858. nfs_reset_boot_verifier(data->inode);
  859. data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
  860. data->task.tk_status = status;
  861. }
  862. }
  863. static void
  864. nfs_local_release_commit_data(struct nfsd_file *localio,
  865. struct nfs_commit_data *data,
  866. const struct rpc_call_ops *call_ops)
  867. {
  868. nfs_local_file_put(localio);
  869. call_ops->rpc_call_done(&data->task, data);
  870. call_ops->rpc_release(data);
  871. }
  872. static void
  873. nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
  874. {
  875. nfs_local_release_commit_data(ctx->localio, ctx->data,
  876. ctx->data->task.tk_ops);
  877. kfree(ctx);
  878. }
  879. static void
  880. nfs_local_fsync_work(struct work_struct *work)
  881. {
  882. unsigned long old_flags = current->flags;
  883. struct nfs_local_fsync_ctx *ctx;
  884. int status;
  885. ctx = container_of(work, struct nfs_local_fsync_ctx, work);
  886. current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
  887. status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
  888. ctx->data);
  889. nfs_local_commit_done(ctx->data, status);
  890. if (ctx->done != NULL)
  891. complete(ctx->done);
  892. nfs_local_fsync_ctx_free(ctx);
  893. current->flags = old_flags;
  894. }
  895. static struct nfs_local_fsync_ctx *
  896. nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
  897. struct nfsd_file *localio, gfp_t flags)
  898. {
  899. struct nfs_local_fsync_ctx *ctx = kmalloc_obj(*ctx, flags);
  900. if (ctx != NULL) {
  901. ctx->localio = localio;
  902. ctx->data = data;
  903. INIT_WORK(&ctx->work, nfs_local_fsync_work);
  904. ctx->done = NULL;
  905. }
  906. return ctx;
  907. }
  908. int nfs_local_commit(struct nfsd_file *localio,
  909. struct nfs_commit_data *data,
  910. const struct rpc_call_ops *call_ops, int how)
  911. {
  912. struct nfs_local_fsync_ctx *ctx;
  913. ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_NOIO);
  914. if (!ctx) {
  915. nfs_local_commit_done(data, -ENOMEM);
  916. nfs_local_release_commit_data(localio, data, call_ops);
  917. return -ENOMEM;
  918. }
  919. nfs_local_init_commit(data, call_ops);
  920. if (how & FLUSH_SYNC) {
  921. DECLARE_COMPLETION_ONSTACK(done);
  922. ctx->done = &done;
  923. queue_work(nfslocaliod_workqueue, &ctx->work);
  924. wait_for_completion(&done);
  925. } else
  926. queue_work(nfslocaliod_workqueue, &ctx->work);
  927. return 0;
  928. }