| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * NFS client support for local clients to bypass network stack
- *
- * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
- * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
- * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
- * Copyright (C) 2024 NeilBrown <neilb@suse.de>
- */
- #include <linux/module.h>
- #include <linux/errno.h>
- #include <linux/vfs.h>
- #include <linux/file.h>
- #include <linux/inet.h>
- #include <linux/sunrpc/addr.h>
- #include <linux/inetdevice.h>
- #include <net/addrconf.h>
- #include <linux/nfs_common.h>
- #include <linux/nfslocalio.h>
- #include <linux/bvec.h>
- #include <linux/nfs.h>
- #include <linux/nfs_fs.h>
- #include <linux/nfs_xdr.h>
- #include "internal.h"
- #include "pnfs.h"
- #include "nfstrace.h"
- #define NFSDBG_FACILITY NFSDBG_VFS
- #define NFSLOCAL_MAX_IOS 3
- struct nfs_local_kiocb {
- struct kiocb kiocb;
- struct bio_vec *bvec;
- struct nfs_pgio_header *hdr;
- struct work_struct work;
- void (*aio_complete_work)(struct work_struct *);
- struct nfsd_file *localio;
- /* Begin mostly DIO-specific members */
- size_t end_len;
- short int end_iter_index;
- atomic_t n_iters;
- struct iov_iter iters[NFSLOCAL_MAX_IOS];
- bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
- /* End mostly DIO-specific members */
- };
- struct nfs_local_fsync_ctx {
- struct nfsd_file *localio;
- struct nfs_commit_data *data;
- struct work_struct work;
- struct completion *done;
- };
- static bool localio_enabled __read_mostly = true;
- module_param(localio_enabled, bool, 0644);
- static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
- const struct rpc_call_ops *call_ops);
- static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
- const struct rpc_call_ops *call_ops);
- static inline bool nfs_client_is_local(const struct nfs_client *clp)
- {
- return !!rcu_access_pointer(clp->cl_uuid.net);
- }
- bool nfs_server_is_local(const struct nfs_client *clp)
- {
- return nfs_client_is_local(clp) && localio_enabled;
- }
- EXPORT_SYMBOL_GPL(nfs_server_is_local);
- /*
- * UUID_IS_LOCAL XDR functions
- */
- static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
- struct xdr_stream *xdr,
- const void *data)
- {
- const u8 *uuid = data;
- encode_opaque_fixed(xdr, uuid, UUID_SIZE);
- }
- static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
- struct xdr_stream *xdr,
- void *result)
- {
- /* void return */
- return 0;
- }
- static const struct rpc_procinfo nfs_localio_procedures[] = {
- [LOCALIOPROC_UUID_IS_LOCAL] = {
- .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
- .p_encode = localio_xdr_enc_uuidargs,
- .p_decode = localio_xdr_dec_uuidres,
- .p_arglen = XDR_QUADLEN(UUID_SIZE),
- .p_replen = 0,
- .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
- .p_name = "UUID_IS_LOCAL",
- },
- };
- static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
- static const struct rpc_version nfslocalio_version1 = {
- .number = 1,
- .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
- .procs = nfs_localio_procedures,
- .counts = nfs_localio_counts,
- };
- static const struct rpc_version *nfslocalio_version[] = {
- [1] = &nfslocalio_version1,
- };
- extern const struct rpc_program nfslocalio_program;
- static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
- const struct rpc_program nfslocalio_program = {
- .name = "nfslocalio",
- .number = NFS_LOCALIO_PROGRAM,
- .nrvers = ARRAY_SIZE(nfslocalio_version),
- .version = nfslocalio_version,
- .stats = &nfslocalio_rpcstat,
- };
- /*
- * nfs_init_localioclient - Initialise an NFS localio client connection
- */
- static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
- {
- struct rpc_clnt *rpcclient_localio;
- rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
- &nfslocalio_program, 1);
- dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
- __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
- (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
- return rpcclient_localio;
- }
- static bool nfs_server_uuid_is_local(struct nfs_client *clp)
- {
- u8 uuid[UUID_SIZE];
- struct rpc_message msg = {
- .rpc_argp = &uuid,
- };
- struct rpc_clnt *rpcclient_localio;
- int status;
- rpcclient_localio = nfs_init_localioclient(clp);
- if (IS_ERR(rpcclient_localio))
- return false;
- export_uuid(uuid, &clp->cl_uuid.uuid);
- msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
- status = rpc_call_sync(rpcclient_localio, &msg, 0);
- dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
- __func__, status);
- rpc_shutdown_client(rpcclient_localio);
- /* Server is only local if it initialized required struct members */
- if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
- return false;
- return true;
- }
- /*
- * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
- * - called after alloc_client and init_client (so cl_rpcclient exists)
- * - this function is idempotent, it can be called for old or new clients
- */
- static void nfs_local_probe(struct nfs_client *clp)
- {
- /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
- if (!localio_enabled ||
- clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
- nfs_localio_disable_client(clp);
- return;
- }
- if (nfs_client_is_local(clp))
- return;
- if (!nfs_uuid_begin(&clp->cl_uuid))
- return;
- if (nfs_server_uuid_is_local(clp))
- nfs_localio_enable_client(clp);
- nfs_uuid_end(&clp->cl_uuid);
- }
- void nfs_local_probe_async_work(struct work_struct *work)
- {
- struct nfs_client *clp =
- container_of(work, struct nfs_client, cl_local_probe_work);
- if (!refcount_inc_not_zero(&clp->cl_count))
- return;
- nfs_local_probe(clp);
- nfs_put_client(clp);
- }
- void nfs_local_probe_async(struct nfs_client *clp)
- {
- queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
- }
- EXPORT_SYMBOL_GPL(nfs_local_probe_async);
- static inline void nfs_local_file_put(struct nfsd_file *localio)
- {
- /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
- * but we have a __kernel pointer. It is always safe
- * to cast a __kernel pointer to an __rcu pointer
- * because the cast only weakens what is known about the pointer.
- */
- struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
- nfs_to_nfsd_file_put_local(&nf);
- }
- /*
- * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
- *
- * Returns a pointer to a struct nfsd_file or ERR_PTR.
- * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
- */
- static struct nfsd_file *
- __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
- struct nfs_fh *fh, struct nfs_file_localio *nfl,
- struct nfsd_file __rcu **pnf,
- const fmode_t mode)
- {
- int status = 0;
- struct nfsd_file *localio;
- localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
- cred, fh, nfl, pnf, mode);
- if (IS_ERR(localio)) {
- status = PTR_ERR(localio);
- switch (status) {
- case -ENOMEM:
- case -ENXIO:
- case -ENOENT:
- /* Revalidate localio */
- nfs_localio_disable_client(clp);
- nfs_local_probe(clp);
- }
- }
- trace_nfs_local_open_fh(fh, mode, status);
- return localio;
- }
- /*
- * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
- * First checking if the open nfsd_file is already cached, otherwise
- * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
- *
- * Returns a pointer to a struct nfsd_file or NULL.
- */
- struct nfsd_file *
- nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
- struct nfs_fh *fh, struct nfs_file_localio *nfl,
- const fmode_t mode)
- {
- struct nfsd_file *nf, __rcu **pnf;
- if (!nfs_server_is_local(clp))
- return NULL;
- if (mode & ~(FMODE_READ | FMODE_WRITE))
- return NULL;
- if (mode & FMODE_WRITE)
- pnf = &nfl->rw_file;
- else
- pnf = &nfl->ro_file;
- nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
- if (IS_ERR(nf))
- return NULL;
- return nf;
- }
- EXPORT_SYMBOL_GPL(nfs_local_open_fh);
- /*
- * Ensure all page cache allocations are done from GFP_NOFS context to
- * prevent direct reclaim recursion back into NFS via nfs_writepages.
- */
- static void
- nfs_local_mapping_set_gfp_nofs_context(struct address_space *m)
- {
- gfp_t gfp_mask = mapping_gfp_mask(m);
- mapping_set_gfp_mask(m, (gfp_mask & ~(__GFP_FS)));
- }
- static void
- nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
- {
- kfree(iocb->bvec);
- kfree(iocb);
- }
- static struct nfs_local_kiocb *
- nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
- struct file *file, gfp_t flags)
- {
- struct nfs_local_kiocb *iocb;
- iocb = kzalloc_obj(*iocb, flags);
- if (iocb == NULL)
- return NULL;
- iocb->bvec = kmalloc_objs(struct bio_vec, hdr->page_array.npages, flags);
- if (iocb->bvec == NULL) {
- kfree(iocb);
- return NULL;
- }
- nfs_local_mapping_set_gfp_nofs_context(file->f_mapping);
- init_sync_kiocb(&iocb->kiocb, file);
- iocb->hdr = hdr;
- iocb->kiocb.ki_pos = hdr->args.offset;
- iocb->kiocb.ki_flags &= ~IOCB_APPEND;
- iocb->kiocb.ki_complete = NULL;
- iocb->aio_complete_work = NULL;
- iocb->end_iter_index = -1;
- return iocb;
- }
- static bool
- nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
- size_t len, struct nfs_local_dio *local_dio)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- loff_t offset = hdr->args.offset;
- u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
- loff_t start_end, orig_end, middle_end;
- nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
- &nf_dio_offset_align, &nf_dio_read_offset_align);
- if (rw == ITER_DEST)
- nf_dio_offset_align = nf_dio_read_offset_align;
- if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
- return false;
- if (unlikely(len < nf_dio_offset_align))
- return false;
- local_dio->mem_align = nf_dio_mem_align;
- local_dio->offset_align = nf_dio_offset_align;
- start_end = round_up(offset, nf_dio_offset_align);
- orig_end = offset + len;
- middle_end = round_down(orig_end, nf_dio_offset_align);
- local_dio->middle_offset = start_end;
- local_dio->end_offset = middle_end;
- local_dio->start_len = start_end - offset;
- local_dio->middle_len = middle_end - start_end;
- local_dio->end_len = orig_end - middle_end;
- if (rw == ITER_DEST)
- trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
- else
- trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
- return true;
- }
- static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
- unsigned int addr_mask, unsigned int len_mask)
- {
- const struct bio_vec *bvec = i->bvec;
- size_t skip = i->iov_offset;
- size_t size = i->count;
- if (size & len_mask)
- return false;
- do {
- size_t len = bvec->bv_len;
- if (len > size)
- len = size;
- if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
- return false;
- bvec++;
- size -= len;
- skip = 0;
- } while (size);
- return true;
- }
- static void
- nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
- unsigned int nvecs, unsigned long total,
- size_t start, size_t len)
- {
- iov_iter_bvec(iter, rw, bvec, nvecs, total);
- if (start)
- iov_iter_advance(iter, start);
- iov_iter_truncate(iter, len);
- }
- /*
- * Setup as many as 3 iov_iter based on extents described by @local_dio.
- * Returns the number of iov_iter that were setup.
- */
- static int
- nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
- unsigned int nvecs, unsigned long total,
- struct nfs_local_dio *local_dio)
- {
- int n_iters = 0;
- struct iov_iter *iters = iocb->iters;
- /* Setup misaligned start? */
- if (local_dio->start_len) {
- nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
- nvecs, total, 0, local_dio->start_len);
- ++n_iters;
- }
- /*
- * Setup DIO-aligned middle, if there is no misaligned end (below)
- * then AIO completion is used, see nfs_local_call_{read,write}
- */
- nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
- total, local_dio->start_len, local_dio->middle_len);
- iocb->iter_is_dio_aligned[n_iters] =
- nfs_iov_iter_aligned_bvec(&iters[n_iters],
- local_dio->mem_align-1, local_dio->offset_align-1);
- if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
- trace_nfs_local_dio_misaligned(iocb->hdr->inode,
- local_dio->start_len, local_dio->middle_len, local_dio);
- return 0; /* no DIO-aligned IO possible */
- }
- iocb->end_iter_index = n_iters;
- ++n_iters;
- /* Setup misaligned end? */
- if (local_dio->end_len) {
- nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
- nvecs, total, local_dio->start_len +
- local_dio->middle_len, local_dio->end_len);
- iocb->end_iter_index = n_iters;
- ++n_iters;
- }
- atomic_set(&iocb->n_iters, n_iters);
- return n_iters;
- }
- static noinline_for_stack void
- nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- struct page **pagevec = hdr->page_array.pagevec;
- unsigned long v, total;
- unsigned int base;
- size_t len;
- v = 0;
- total = hdr->args.count;
- base = hdr->args.pgbase;
- pagevec += base >> PAGE_SHIFT;
- base &= ~PAGE_MASK;
- while (total && v < hdr->page_array.npages) {
- len = min_t(size_t, total, PAGE_SIZE - base);
- bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
- total -= len;
- ++pagevec;
- ++v;
- base = 0;
- }
- len = hdr->args.count - total;
- /*
- * For each iocb, iocb->n_iters is always at least 1 and we always
- * end io after first nfs_local_pgio_done call unless misaligned DIO.
- */
- atomic_set(&iocb->n_iters, 1);
- if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
- struct nfs_local_dio local_dio;
- if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
- nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
- /* Ensure DIO WRITE's IO on stable storage upon completion */
- if (rw == ITER_SOURCE)
- iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
- return; /* is DIO-aligned */
- }
- }
- /* Use buffered IO */
- iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
- }
- static void
- nfs_local_hdr_release(struct nfs_pgio_header *hdr,
- const struct rpc_call_ops *call_ops)
- {
- call_ops->rpc_call_done(&hdr->task, hdr);
- call_ops->rpc_release(hdr);
- }
- static void
- nfs_local_pgio_init(struct nfs_pgio_header *hdr,
- const struct rpc_call_ops *call_ops)
- {
- hdr->task.tk_ops = call_ops;
- if (!hdr->task.tk_start)
- hdr->task.tk_start = ktime_get();
- }
- static bool nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- /* Must handle partial completions */
- if (status >= 0) {
- hdr->res.count += status;
- /* @hdr was initialized to 0 (zeroed during allocation) */
- if (hdr->task.tk_status == 0)
- hdr->res.op_status = NFS4_OK;
- } else {
- hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
- hdr->task.tk_status = status;
- }
- BUG_ON(atomic_read(&iocb->n_iters) <= 0);
- return atomic_dec_and_test(&iocb->n_iters);
- }
- static void
- nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
- {
- nfs_local_file_put(iocb->localio);
- nfs_local_iocb_free(iocb);
- }
- static void nfs_local_pgio_restart(struct nfs_local_kiocb *iocb,
- struct nfs_pgio_header *hdr)
- {
- int status = 0;
- iocb->kiocb.ki_pos = hdr->args.offset;
- iocb->kiocb.ki_flags &= ~(IOCB_DSYNC | IOCB_SYNC | IOCB_DIRECT);
- iocb->kiocb.ki_complete = NULL;
- iocb->aio_complete_work = NULL;
- iocb->end_iter_index = -1;
- switch (hdr->rw_mode) {
- case FMODE_READ:
- nfs_local_iters_init(iocb, ITER_DEST);
- nfs_local_do_read(iocb, hdr->task.tk_ops);
- break;
- case FMODE_WRITE:
- nfs_local_iters_init(iocb, ITER_SOURCE);
- nfs_local_do_write(iocb, hdr->task.tk_ops);
- break;
- default:
- status = -EOPNOTSUPP;
- }
- if (unlikely(status != 0)) {
- nfs_local_iocb_release(iocb);
- hdr->task.tk_status = status;
- nfs_local_hdr_release(hdr, hdr->task.tk_ops);
- }
- }
- static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- struct rpc_task *task = &hdr->task;
- task->tk_action = NULL;
- task->tk_ops->rpc_call_done(task, hdr);
- if (task->tk_action == NULL) {
- nfs_local_iocb_release(iocb);
- task->tk_ops->rpc_release(hdr);
- } else
- nfs_local_pgio_restart(iocb, hdr);
- }
- /*
- * Complete the I/O from iocb->kiocb.ki_complete()
- *
- * Note that this function can be called from a bottom half context,
- * hence we need to queue the rpc_call_done() etc to a workqueue
- */
- static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
- {
- INIT_WORK(&iocb->work, iocb->aio_complete_work);
- queue_work(nfsiod_workqueue, &iocb->work);
- }
- static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- struct file *filp = iocb->kiocb.ki_filp;
- long status = hdr->task.tk_status;
- if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
- /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
- pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
- }
- /*
- * Must clear replen otherwise NFSv3 data corruption will occur
- * if/when switching from LOCALIO back to using normal RPC.
- */
- hdr->res.replen = 0;
- /* nfs_readpage_result() handles short read */
- if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
- hdr->res.eof = true;
- dprintk("%s: read %ld bytes eof %d.\n", __func__,
- status > 0 ? status : 0, hdr->res.eof);
- }
- static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
- {
- nfs_local_read_done(iocb);
- nfs_local_pgio_release(iocb);
- }
- static void nfs_local_read_aio_complete_work(struct work_struct *work)
- {
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- nfs_local_read_iocb_done(iocb);
- }
- static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
- {
- struct nfs_local_kiocb *iocb =
- container_of(kiocb, struct nfs_local_kiocb, kiocb);
- /* AIO completion of DIO read should always be last to complete */
- if (unlikely(!nfs_local_pgio_done(iocb, ret)))
- return;
- nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
- }
- static void nfs_local_call_read(struct work_struct *work)
- {
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- struct file *filp = iocb->kiocb.ki_filp;
- ssize_t status;
- int n_iters;
- n_iters = atomic_read(&iocb->n_iters);
- for (int i = 0; i < n_iters ; i++) {
- if (iocb->iter_is_dio_aligned[i]) {
- iocb->kiocb.ki_flags |= IOCB_DIRECT;
- /* Only use AIO completion if DIO-aligned segment is last */
- if (i == iocb->end_iter_index) {
- iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
- iocb->aio_complete_work = nfs_local_read_aio_complete_work;
- }
- } else
- iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
- scoped_with_creds(filp->f_cred)
- status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
- if (status == -EIOCBQUEUED)
- continue;
- /* Break on completion, errors, or short reads */
- if (nfs_local_pgio_done(iocb, status) || status < 0 ||
- (size_t)status < iov_iter_count(&iocb->iters[i])) {
- nfs_local_read_iocb_done(iocb);
- break;
- }
- }
- }
- static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
- const struct rpc_call_ops *call_ops)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- dprintk("%s: vfs_read count=%u pos=%llu\n",
- __func__, hdr->args.count, hdr->args.offset);
- nfs_local_pgio_init(hdr, call_ops);
- hdr->res.eof = false;
- INIT_WORK(&iocb->work, nfs_local_call_read);
- queue_work(nfslocaliod_workqueue, &iocb->work);
- }
- static void
- nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
- {
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- u32 *verf = (u32 *)verifier->data;
- unsigned int seq;
- do {
- seq = read_seqbegin(&clp->cl_boot_lock);
- verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
- verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
- } while (read_seqretry(&clp->cl_boot_lock, seq));
- }
- static void
- nfs_reset_boot_verifier(struct inode *inode)
- {
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- write_seqlock(&clp->cl_boot_lock);
- ktime_get_real_ts64(&clp->cl_nfssvc_boot);
- write_sequnlock(&clp->cl_boot_lock);
- }
- static void
- nfs_set_local_verifier(struct inode *inode,
- struct nfs_writeverf *verf,
- enum nfs3_stable_how how)
- {
- nfs_copy_boot_verifier(&verf->verifier, inode);
- verf->committed = how;
- }
- /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
- static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
- {
- u32 request_mask = STATX_BASIC_STATS;
- if (version == 4)
- request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
- return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
- }
- /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
- static u64 __nfsd4_change_attribute(const struct kstat *stat,
- const struct inode *inode)
- {
- u64 chattr;
- if (stat->result_mask & STATX_CHANGE_COOKIE) {
- chattr = stat->change_cookie;
- if (S_ISREG(inode->i_mode) &&
- !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
- chattr += (u64)stat->ctime.tv_sec << 30;
- chattr += stat->ctime.tv_nsec;
- }
- } else {
- chattr = time_to_chattr(&stat->ctime);
- }
- return chattr;
- }
- static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
- {
- struct kstat stat;
- struct file *filp = iocb->kiocb.ki_filp;
- struct nfs_pgio_header *hdr = iocb->hdr;
- struct nfs_fattr *fattr = hdr->res.fattr;
- int version = NFS_PROTO(hdr->inode)->version;
- if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
- return;
- fattr->valid = (NFS_ATTR_FATTR_FILEID |
- NFS_ATTR_FATTR_CHANGE |
- NFS_ATTR_FATTR_SIZE |
- NFS_ATTR_FATTR_ATIME |
- NFS_ATTR_FATTR_MTIME |
- NFS_ATTR_FATTR_CTIME |
- NFS_ATTR_FATTR_SPACE_USED);
- fattr->fileid = stat.ino;
- fattr->size = stat.size;
- fattr->atime = stat.atime;
- fattr->mtime = stat.mtime;
- fattr->ctime = stat.ctime;
- if (version == 4) {
- fattr->change_attr =
- __nfsd4_change_attribute(&stat, file_inode(filp));
- } else
- fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
- fattr->du.nfs3.used = stat.blocks << 9;
- }
- static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- long status = hdr->task.tk_status;
- dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
- if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
- /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
- pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
- }
- if (status < 0)
- nfs_reset_boot_verifier(hdr->inode);
- }
- static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
- {
- nfs_local_write_done(iocb);
- nfs_local_vfs_getattr(iocb);
- nfs_local_pgio_release(iocb);
- }
- static void nfs_local_write_aio_complete_work(struct work_struct *work)
- {
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- nfs_local_write_iocb_done(iocb);
- }
- static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
- {
- struct nfs_local_kiocb *iocb =
- container_of(kiocb, struct nfs_local_kiocb, kiocb);
- /* AIO completion of DIO write should always be last to complete */
- if (unlikely(!nfs_local_pgio_done(iocb, ret)))
- return;
- nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
- }
- static void nfs_local_call_write(struct work_struct *work)
- {
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- struct file *filp = iocb->kiocb.ki_filp;
- unsigned long old_flags = current->flags;
- ssize_t status;
- int n_iters;
- current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
- file_start_write(filp);
- n_iters = atomic_read(&iocb->n_iters);
- for (int i = 0; i < n_iters ; i++) {
- if (iocb->iter_is_dio_aligned[i]) {
- iocb->kiocb.ki_flags |= IOCB_DIRECT;
- /* Only use AIO completion if DIO-aligned segment is last */
- if (i == iocb->end_iter_index) {
- iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
- iocb->aio_complete_work = nfs_local_write_aio_complete_work;
- }
- } else
- iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
- scoped_with_creds(filp->f_cred)
- status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
- if (status == -EIOCBQUEUED)
- continue;
- /* Break on completion, errors, or short writes */
- if (nfs_local_pgio_done(iocb, status) || status < 0 ||
- (size_t)status < iov_iter_count(&iocb->iters[i])) {
- nfs_local_write_iocb_done(iocb);
- break;
- }
- }
- file_end_write(filp);
- current->flags = old_flags;
- }
- static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
- const struct rpc_call_ops *call_ops)
- {
- struct nfs_pgio_header *hdr = iocb->hdr;
- dprintk("%s: vfs_write count=%u pos=%llu %s\n",
- __func__, hdr->args.count, hdr->args.offset,
- (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
- switch (hdr->args.stable) {
- default:
- break;
- case NFS_DATA_SYNC:
- iocb->kiocb.ki_flags |= IOCB_DSYNC;
- break;
- case NFS_FILE_SYNC:
- iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
- }
- nfs_local_pgio_init(hdr, call_ops);
- nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
- INIT_WORK(&iocb->work, nfs_local_call_write);
- queue_work(nfslocaliod_workqueue, &iocb->work);
- }
- static struct nfs_local_kiocb *
- nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
- {
- struct file *file = nfs_to->nfsd_file_file(localio);
- struct nfs_local_kiocb *iocb;
- gfp_t gfp_mask;
- int rw;
- if (hdr->rw_mode & FMODE_READ) {
- if (!file->f_op->read_iter)
- return ERR_PTR(-EOPNOTSUPP);
- gfp_mask = GFP_KERNEL;
- rw = ITER_DEST;
- } else {
- if (!file->f_op->write_iter)
- return ERR_PTR(-EOPNOTSUPP);
- gfp_mask = GFP_NOIO;
- rw = ITER_SOURCE;
- }
- iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
- if (iocb == NULL)
- return ERR_PTR(-ENOMEM);
- iocb->hdr = hdr;
- iocb->localio = localio;
- nfs_local_iters_init(iocb, rw);
- return iocb;
- }
- int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
- struct nfs_pgio_header *hdr,
- const struct rpc_call_ops *call_ops)
- {
- struct nfs_local_kiocb *iocb;
- int status = 0;
- if (!hdr->args.count)
- return 0;
- iocb = nfs_local_iocb_init(hdr, localio);
- if (IS_ERR(iocb))
- return PTR_ERR(iocb);
- switch (hdr->rw_mode) {
- case FMODE_READ:
- nfs_local_do_read(iocb, call_ops);
- break;
- case FMODE_WRITE:
- nfs_local_do_write(iocb, call_ops);
- break;
- default:
- dprintk("%s: invalid mode: %d\n", __func__,
- hdr->rw_mode);
- status = -EOPNOTSUPP;
- }
- if (unlikely(status != 0)) {
- nfs_local_iocb_release(iocb);
- hdr->task.tk_status = status;
- nfs_local_hdr_release(hdr, call_ops);
- }
- return status;
- }
- static void
- nfs_local_init_commit(struct nfs_commit_data *data,
- const struct rpc_call_ops *call_ops)
- {
- data->task.tk_ops = call_ops;
- }
- static int
- nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
- {
- loff_t start = data->args.offset;
- loff_t end = LLONG_MAX;
- if (data->args.count > 0) {
- end = start + data->args.count - 1;
- if (end < start)
- end = LLONG_MAX;
- }
- nfs_local_mapping_set_gfp_nofs_context(filp->f_mapping);
- dprintk("%s: commit %llu - %llu\n", __func__, start, end);
- return vfs_fsync_range(filp, start, end, 0);
- }
- static void
- nfs_local_commit_done(struct nfs_commit_data *data, int status)
- {
- if (status >= 0) {
- nfs_set_local_verifier(data->inode,
- data->res.verf,
- NFS_FILE_SYNC);
- data->res.op_status = NFS4_OK;
- data->task.tk_status = 0;
- } else {
- nfs_reset_boot_verifier(data->inode);
- data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
- data->task.tk_status = status;
- }
- }
- static void
- nfs_local_release_commit_data(struct nfsd_file *localio,
- struct nfs_commit_data *data,
- const struct rpc_call_ops *call_ops)
- {
- nfs_local_file_put(localio);
- call_ops->rpc_call_done(&data->task, data);
- call_ops->rpc_release(data);
- }
- static void
- nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
- {
- nfs_local_release_commit_data(ctx->localio, ctx->data,
- ctx->data->task.tk_ops);
- kfree(ctx);
- }
- static void
- nfs_local_fsync_work(struct work_struct *work)
- {
- unsigned long old_flags = current->flags;
- struct nfs_local_fsync_ctx *ctx;
- int status;
- ctx = container_of(work, struct nfs_local_fsync_ctx, work);
- current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
- status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
- ctx->data);
- nfs_local_commit_done(ctx->data, status);
- if (ctx->done != NULL)
- complete(ctx->done);
- nfs_local_fsync_ctx_free(ctx);
- current->flags = old_flags;
- }
- static struct nfs_local_fsync_ctx *
- nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
- struct nfsd_file *localio, gfp_t flags)
- {
- struct nfs_local_fsync_ctx *ctx = kmalloc_obj(*ctx, flags);
- if (ctx != NULL) {
- ctx->localio = localio;
- ctx->data = data;
- INIT_WORK(&ctx->work, nfs_local_fsync_work);
- ctx->done = NULL;
- }
- return ctx;
- }
- int nfs_local_commit(struct nfsd_file *localio,
- struct nfs_commit_data *data,
- const struct rpc_call_ops *call_ops, int how)
- {
- struct nfs_local_fsync_ctx *ctx;
- ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_NOIO);
- if (!ctx) {
- nfs_local_commit_done(data, -ENOMEM);
- nfs_local_release_commit_data(localio, data, call_ops);
- return -ENOMEM;
- }
- nfs_local_init_commit(data, call_ops);
- if (how & FLUSH_SYNC) {
- DECLARE_COMPLETION_ONSTACK(done);
- ctx->done = &done;
- queue_work(nfslocaliod_workqueue, &ctx->work);
- wait_for_completion(&done);
- } else
- queue_work(nfslocaliod_workqueue, &ctx->work);
- return 0;
- }
|