| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595 |
- // SPDX-License-Identifier: GPL-2.0-only
- /* Network filesystem read subrequest result collection, assessment and
- * retrying.
- *
- * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
- #include <linux/export.h>
- #include <linux/fs.h>
- #include <linux/mm.h>
- #include <linux/pagemap.h>
- #include <linux/slab.h>
- #include <linux/task_io_accounting_ops.h>
- #include "internal.h"
- /* Notes made in the collector */
- #define HIT_PENDING 0x01 /* A front op was still pending */
- #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
- #define BUFFERED 0x08 /* The pagecache needs cleaning up */
- #define NEED_RETRY 0x10 /* A front op requests retrying */
- #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */
- #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */
- /*
- * Clear the unread part of an I/O request.
- */
- static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
- {
- netfs_reset_iter(subreq);
- WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
- iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
- if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
- __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
- }
- /*
- * Flush, mark and unlock a folio that's now completely read. If we want to
- * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
- * dirty and let writeback handle it.
- */
- static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- int slot)
- {
- struct netfs_folio *finfo;
- struct folio *folio = folioq_folio(folioq, slot);
- if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
- trace_netfs_folio(folio, netfs_folio_trace_abandon);
- goto just_unlock;
- }
- flush_dcache_folio(folio);
- folio_mark_uptodate(folio);
- if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
- finfo = netfs_folio_info(folio);
- if (finfo) {
- trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
- if (finfo->netfs_group)
- folio_change_private(folio, finfo->netfs_group);
- else
- folio_detach_private(folio);
- kfree(finfo);
- }
- if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
- if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
- folio_mark_dirty(folio);
- }
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_read_done);
- }
- folioq_clear(folioq, slot);
- } else {
- // TODO: Use of PG_private_2 is deprecated.
- if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
- netfs_pgpriv2_copy_to_cache(rreq, folio);
- }
- just_unlock:
- if (folio->index == rreq->no_unlock_folio &&
- test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
- _debug("no unlock");
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
- folio_unlock(folio);
- }
- folioq_clear(folioq, slot);
- }
- /*
- * Unlock any folios we've finished with.
- */
- static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
- unsigned int *notes)
- {
- struct folio_queue *folioq = rreq->buffer.tail;
- unsigned long long collected_to = rreq->collected_to;
- unsigned int slot = rreq->buffer.first_tail_slot;
- if (rreq->cleaned_to >= rreq->collected_to)
- return;
- // TODO: Begin decryption
- if (slot >= folioq_nr_slots(folioq)) {
- folioq = rolling_buffer_delete_spent(&rreq->buffer);
- if (!folioq) {
- rreq->front_folio_order = 0;
- return;
- }
- slot = 0;
- }
- for (;;) {
- struct folio *folio;
- unsigned long long fpos, fend;
- unsigned int order;
- size_t fsize;
- if (*notes & COPY_TO_CACHE)
- set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
- folio = folioq_folio(folioq, slot);
- if (WARN_ONCE(!folio_test_locked(folio),
- "R=%08x: folio %lx is not locked\n",
- rreq->debug_id, folio->index))
- trace_netfs_folio(folio, netfs_folio_trace_not_locked);
- order = folioq_folio_order(folioq, slot);
- rreq->front_folio_order = order;
- fsize = PAGE_SIZE << order;
- fpos = folio_pos(folio);
- fend = fpos + fsize;
- trace_netfs_collect_folio(rreq, folio, fend, collected_to);
- /* Unlock any folio we've transferred all of. */
- if (collected_to < fend)
- break;
- netfs_unlock_read_folio(rreq, folioq, slot);
- WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
- *notes |= MADE_PROGRESS;
- clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
- /* Clean up the head folioq. If we clear an entire folioq, then
- * we can get rid of it provided it's not also the tail folioq
- * being filled by the issuer.
- */
- folioq_clear(folioq, slot);
- slot++;
- if (slot >= folioq_nr_slots(folioq)) {
- folioq = rolling_buffer_delete_spent(&rreq->buffer);
- if (!folioq)
- goto done;
- slot = 0;
- trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
- }
- if (fpos + fsize >= collected_to)
- break;
- }
- rreq->buffer.tail = folioq;
- done:
- rreq->buffer.first_tail_slot = slot;
- }
- /*
- * Collect and assess the results of various read subrequests. We may need to
- * retry some of the results.
- *
- * Note that we have a sequence of subrequests, which may be drawing on
- * different sources and may or may not be the same size or starting position
- * and may not even correspond in boundary alignment.
- */
- static void netfs_collect_read_results(struct netfs_io_request *rreq)
- {
- struct netfs_io_subrequest *front, *remove;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- unsigned int notes;
- _enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
- trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
- trace_netfs_collect(rreq);
- reassess:
- if (rreq->origin == NETFS_READAHEAD ||
- rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE)
- notes = BUFFERED;
- else
- notes = 0;
- /* Remove completed subrequests from the front of the stream and
- * advance the completion point. We stop when we hit something that's
- * in progress. The issuer thread may be adding stuff to the tail
- * whilst we're doing this.
- */
- front = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- while (front) {
- size_t transferred;
- trace_netfs_collect_sreq(rreq, front);
- _debug("sreq [%x] %llx %zx/%zx",
- front->debug_index, front->start, front->transferred, front->len);
- if (stream->collected_to < front->start) {
- trace_netfs_collect_gap(rreq, stream, front->start, 'F');
- stream->collected_to = front->start;
- }
- if (netfs_check_subreq_in_progress(front))
- notes |= HIT_PENDING;
- smp_rmb(); /* Read counters after IN_PROGRESS flag. */
- transferred = READ_ONCE(front->transferred);
- /* If we can now collect the next folio, do so. We don't want
- * to defer this as we have to decide whether we need to copy
- * to the cache or not, and that may differ between adjacent
- * subreqs.
- */
- if (notes & BUFFERED) {
- size_t fsize = PAGE_SIZE << rreq->front_folio_order;
- /* Clear the tail of a short read. */
- if (!(notes & HIT_PENDING) &&
- front->error == 0 &&
- transferred < front->len &&
- (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
- test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
- netfs_clear_unread(front);
- transferred = front->transferred = front->len;
- trace_netfs_sreq(front, netfs_sreq_trace_clear);
- }
- stream->collected_to = front->start + transferred;
- rreq->collected_to = stream->collected_to;
- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
- notes |= COPY_TO_CACHE;
- if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
- rreq->abandon_to = front->start + front->len;
- front->transferred = front->len;
- transferred = front->len;
- trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
- }
- if (front->start + transferred >= rreq->cleaned_to + fsize ||
- test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
- netfs_read_unlock_folios(rreq, ¬es);
- } else {
- stream->collected_to = front->start + transferred;
- rreq->collected_to = stream->collected_to;
- }
- /* Stall if the front is still undergoing I/O. */
- if (notes & HIT_PENDING)
- break;
- if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
- if (!stream->failed) {
- stream->error = front->error;
- rreq->error = front->error;
- set_bit(NETFS_RREQ_FAILED, &rreq->flags);
- stream->failed = true;
- }
- notes |= MADE_PROGRESS | ABANDON_SREQ;
- } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
- stream->need_retry = true;
- notes |= NEED_RETRY | MADE_PROGRESS;
- break;
- } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
- notes |= MADE_PROGRESS;
- } else {
- if (!stream->failed) {
- stream->transferred += transferred;
- stream->transferred_valid = true;
- }
- if (front->transferred < front->len)
- set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
- notes |= MADE_PROGRESS;
- }
- /* Remove if completely consumed. */
- stream->source = front->source;
- spin_lock(&rreq->lock);
- remove = front;
- trace_netfs_sreq(front,
- notes & ABANDON_SREQ ?
- netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
- list_del_init(&front->rreq_link);
- front = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- spin_unlock(&rreq->lock);
- netfs_put_subrequest(remove,
- notes & ABANDON_SREQ ?
- netfs_sreq_trace_put_abandon :
- netfs_sreq_trace_put_done);
- }
- trace_netfs_collect_stream(rreq, stream);
- trace_netfs_collect_state(rreq, rreq->collected_to, notes);
- if (!(notes & BUFFERED))
- rreq->cleaned_to = rreq->collected_to;
- if (notes & NEED_RETRY)
- goto need_retry;
- if (notes & MADE_PROGRESS) {
- netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
- //cond_resched();
- goto reassess;
- }
- out:
- _leave(" = %x", notes);
- return;
- need_retry:
- /* Okay... We're going to have to retry parts of the stream. Note
- * that any partially completed op will have had any wholly transferred
- * folios removed from it.
- */
- _debug("retry");
- netfs_retry_reads(rreq);
- goto out;
- }
- /*
- * Do page flushing and suchlike after DIO.
- */
- static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
- {
- unsigned int i;
- if (rreq->origin == NETFS_UNBUFFERED_READ ||
- rreq->origin == NETFS_DIO_READ) {
- for (i = 0; i < rreq->direct_bv_count; i++) {
- flush_dcache_page(rreq->direct_bv[i].bv_page);
- // TODO: cifs marks pages in the destination buffer
- // dirty under some circumstances after a read. Do we
- // need to do that too?
- set_page_dirty(rreq->direct_bv[i].bv_page);
- }
- }
- if (rreq->iocb) {
- rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
- rreq->iocb->ki_complete(
- rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
- }
- }
- if (rreq->netfs_ops->done)
- rreq->netfs_ops->done(rreq);
- if (rreq->origin == NETFS_UNBUFFERED_READ ||
- rreq->origin == NETFS_DIO_READ)
- inode_dio_end(rreq->inode);
- }
- /*
- * Do processing after reading a monolithic single object.
- */
- static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
- {
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
- fscache_resources_valid(&rreq->cache_resources)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
- netfs_single_mark_inode_dirty(rreq->inode);
- }
- if (rreq->iocb) {
- rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
- rreq->iocb->ki_complete(
- rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
- }
- }
- if (rreq->netfs_ops->done)
- rreq->netfs_ops->done(rreq);
- }
- /*
- * Perform the collection of subrequests and folios.
- *
- * Note that we're in normal kernel thread context at this point, possibly
- * running on a workqueue.
- */
- bool netfs_read_collection(struct netfs_io_request *rreq)
- {
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- netfs_collect_read_results(rreq);
- /* We're done when the app thread has finished posting subreqs and the
- * queue is empty.
- */
- if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
- return false;
- smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
- if (!list_empty(&stream->subrequests))
- return false;
- /* Okay, declare that all I/O is complete. */
- rreq->transferred = stream->transferred;
- trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
- //netfs_rreq_is_still_valid(rreq);
- switch (rreq->origin) {
- case NETFS_UNBUFFERED_READ:
- case NETFS_DIO_READ:
- case NETFS_READ_GAPS:
- netfs_rreq_assess_dio(rreq);
- break;
- case NETFS_READ_SINGLE:
- netfs_rreq_assess_single(rreq);
- break;
- default:
- break;
- }
- task_io_account_read(rreq->transferred);
- netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
- /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
- trace_netfs_rreq(rreq, netfs_rreq_trace_done);
- netfs_clear_subrequests(rreq);
- netfs_unlock_abandoned_read_pages(rreq);
- if (unlikely(rreq->copy_to_cache))
- netfs_pgpriv2_end_copy_to_cache(rreq);
- return true;
- }
- void netfs_read_collection_worker(struct work_struct *work)
- {
- struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
- netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (netfs_check_rreq_in_progress(rreq)) {
- if (netfs_read_collection(rreq))
- /* Drop the ref from the IN_PROGRESS flag. */
- netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
- else
- netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
- }
- }
- /**
- * netfs_read_subreq_progress - Note progress of a read operation.
- * @subreq: The read request that has terminated.
- *
- * This tells the read side of netfs lib that a contributory I/O operation has
- * made some progress and that it may be possible to unlock some folios.
- *
- * Before calling, the filesystem should update subreq->transferred to track
- * the amount of data copied into the output buffer.
- */
- void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
- {
- struct netfs_io_request *rreq = subreq->rreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- size_t fsize = PAGE_SIZE << rreq->front_folio_order;
- trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
- /* If we are at the head of the queue, wake up the collector,
- * getting a ref to it if we were the ones to do so.
- */
- if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
- (rreq->origin == NETFS_READAHEAD ||
- rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE) &&
- list_is_first(&subreq->rreq_link, &stream->subrequests)
- ) {
- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_wake_collector(rreq);
- }
- }
- EXPORT_SYMBOL(netfs_read_subreq_progress);
- /**
- * netfs_read_subreq_terminated - Note the termination of an I/O operation.
- * @subreq: The I/O request that has terminated.
- *
- * This tells the read helper that a contributory I/O operation has terminated,
- * one way or another, and that it should integrate the results.
- *
- * The caller indicates the outcome of the operation through @subreq->error,
- * supplying 0 to indicate a successful or retryable transfer (if
- * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will
- * look after reissuing I/O operations as appropriate and writing downloaded
- * data to the cache.
- *
- * Before calling, the filesystem should update subreq->transferred to track
- * the amount of data copied into the output buffer.
- */
- void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
- {
- struct netfs_io_request *rreq = subreq->rreq;
- switch (subreq->source) {
- case NETFS_READ_FROM_CACHE:
- netfs_stat(&netfs_n_rh_read_done);
- break;
- case NETFS_DOWNLOAD_FROM_SERVER:
- netfs_stat(&netfs_n_rh_download_done);
- break;
- default:
- break;
- }
- /* Deal with retry requests, short reads and errors. If we retry
- * but don't make progress, we abandon the attempt.
- */
- if (!subreq->error && subreq->transferred < subreq->len) {
- if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
- } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear);
- } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
- } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
- } else {
- __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
- subreq->error = -ENODATA;
- trace_netfs_sreq(subreq, netfs_sreq_trace_short);
- }
- }
- /* If need retry is set, error should not matter unless we hit too many
- * retries. Pause the generation of new subreqs
- */
- if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
- set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
- goto skip_error_checks;
- }
- if (unlikely(subreq->error < 0)) {
- trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
- if (subreq->source == NETFS_READ_FROM_CACHE) {
- netfs_stat(&netfs_n_rh_read_failed);
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- } else {
- netfs_stat(&netfs_n_rh_download_failed);
- __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
- }
- trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
- set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
- }
- skip_error_checks:
- trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
- netfs_subreq_clear_in_progress(subreq);
- netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
- }
- EXPORT_SYMBOL(netfs_read_subreq_terminated);
- /*
- * Handle termination of a read from the cache.
- */
- void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
- {
- struct netfs_io_subrequest *subreq = priv;
- if (transferred_or_error > 0) {
- subreq->error = 0;
- if (transferred_or_error > 0) {
- subreq->transferred += transferred_or_error;
- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- }
- } else {
- subreq->error = transferred_or_error;
- }
- netfs_read_subreq_terminated(subreq);
- }
|