| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- /*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2020 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #include <linux/mm.h>
- #include <linux/dma-mapping.h>
- #include <linux/sched/signal.h>
- #include <linux/sched/mm.h>
- #include <linux/export.h>
- #include <linux/slab.h>
- #include <linux/pagemap.h>
- #include <linux/count_zeros.h>
- #include <rdma/ib_umem_odp.h>
- #include "uverbs.h"
- #define RESCHED_LOOP_CNT_THRESHOLD 0x1000
- static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
- {
- bool make_dirty = umem->writable && dirty;
- struct scatterlist *sg;
- unsigned int i;
- if (dirty)
- ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
- DMA_BIDIRECTIONAL,
- DMA_ATTR_REQUIRE_COHERENT);
- for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
- unpin_user_page_range_dirty_lock(sg_page(sg),
- DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
- if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD))
- cond_resched();
- }
- sg_free_append_table(&umem->sgt_append);
- }
- /**
- * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
- *
- * @umem: umem struct
- * @pgsz_bitmap: bitmap of HW supported page sizes
- * @virt: IOVA
- *
- * This helper is intended for HW that support multiple page
- * sizes but can do only a single page size in an MR.
- *
- * Returns 0 if the umem requires page sizes not supported by
- * the driver to be mapped. Drivers always supporting PAGE_SIZE
- * or smaller will never see a 0 result.
- */
- unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
- unsigned long pgsz_bitmap,
- unsigned long virt)
- {
- unsigned long curr_len = 0;
- dma_addr_t curr_base = ~0;
- unsigned long va, pgoff;
- struct scatterlist *sg;
- dma_addr_t mask;
- dma_addr_t end;
- int i;
- umem->iova = va = virt;
- if (umem->is_odp) {
- unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
- /* ODP must always be self consistent. */
- if (!(pgsz_bitmap & page_size))
- return 0;
- return page_size;
- }
- /* The best result is the smallest page size that results in the minimum
- * number of required pages. Compute the largest page size that could
- * work based on VA address bits that don't change.
- */
- mask = pgsz_bitmap &
- GENMASK(BITS_PER_LONG - 1,
- bits_per((umem->length - 1 + virt) ^ virt));
- /* offset into first SGL */
- pgoff = umem->address & ~PAGE_MASK;
- for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- /* If the current entry is physically contiguous with the previous
- * one, no need to take its start addresses into consideration.
- */
- if (check_add_overflow(curr_base, curr_len, &end) ||
- end != sg_dma_address(sg)) {
- curr_base = sg_dma_address(sg);
- curr_len = 0;
- /* Reduce max page size if VA/PA bits differ */
- mask |= (curr_base + pgoff) ^ va;
- /* The alignment of any VA matching a discontinuity point
- * in the physical memory sets the maximum possible page
- * size as this must be a starting point of a new page that
- * needs to be aligned.
- */
- if (i != 0)
- mask |= va;
- }
- curr_len += sg_dma_len(sg);
- va += sg_dma_len(sg) - pgoff;
- pgoff = 0;
- }
- /* The mask accumulates 1's in each position where the VA and physical
- * address differ, thus the length of trailing 0 is the largest page
- * size that can pass the VA through to the physical.
- */
- if (mask)
- pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
- return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
- }
- EXPORT_SYMBOL(ib_umem_find_best_pgsz);
- /**
- * ib_umem_get - Pin and DMA map userspace memory.
- *
- * @device: IB device to connect UMEM
- * @addr: userspace virtual address to start at
- * @size: length of region to pin
- * @access: IB_ACCESS_xxx flags for memory being pinned
- */
- struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
- size_t size, int access)
- {
- struct ib_umem *umem;
- struct page **page_list;
- unsigned long lock_limit;
- unsigned long new_pinned;
- unsigned long cur_base;
- unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
- struct mm_struct *mm;
- unsigned long npages;
- int pinned, ret;
- unsigned int gup_flags = FOLL_LONGTERM;
- /*
- * If the combination of the addr and size requested for this memory
- * region causes an integer overflow, return error.
- */
- if (((addr + size) < addr) ||
- PAGE_ALIGN(addr + size) < (addr + size))
- return ERR_PTR(-EINVAL);
- if (!can_do_mlock())
- return ERR_PTR(-EPERM);
- if (access & IB_ACCESS_ON_DEMAND)
- return ERR_PTR(-EOPNOTSUPP);
- umem = kzalloc_obj(*umem);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- umem->ibdev = device;
- umem->length = size;
- umem->address = addr;
- /*
- * Drivers should call ib_umem_find_best_pgsz() to set the iova
- * correctly.
- */
- umem->iova = addr;
- umem->writable = ib_access_writable(access);
- umem->owning_mm = mm = current->mm;
- mmgrab(mm);
- page_list = (struct page **) __get_free_page(GFP_KERNEL);
- if (!page_list) {
- ret = -ENOMEM;
- goto umem_kfree;
- }
- npages = ib_umem_num_pages(umem);
- if (npages == 0 || npages > UINT_MAX) {
- ret = -EINVAL;
- goto out;
- }
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
- if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
- atomic64_sub(npages, &mm->pinned_vm);
- ret = -ENOMEM;
- goto out;
- }
- cur_base = addr & PAGE_MASK;
- if (umem->writable)
- gup_flags |= FOLL_WRITE;
- while (npages) {
- cond_resched();
- pinned = pin_user_pages_fast(cur_base,
- min_t(unsigned long, npages,
- PAGE_SIZE /
- sizeof(struct page *)),
- gup_flags, page_list);
- if (pinned < 0) {
- ret = pinned;
- goto umem_release;
- }
- cur_base += pinned * PAGE_SIZE;
- npages -= pinned;
- ret = sg_alloc_append_table_from_pages(
- &umem->sgt_append, page_list, pinned, 0,
- pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
- npages, GFP_KERNEL);
- if (ret) {
- unpin_user_pages_dirty_lock(page_list, pinned, 0);
- goto umem_release;
- }
- }
- if (access & IB_ACCESS_RELAXED_ORDERING)
- dma_attr |= DMA_ATTR_WEAK_ORDERING;
- ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
- DMA_BIDIRECTIONAL, dma_attr);
- if (ret)
- goto umem_release;
- goto out;
- umem_release:
- __ib_umem_release(device, umem, 0);
- atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
- out:
- free_page((unsigned long) page_list);
- umem_kfree:
- if (ret) {
- mmdrop(umem->owning_mm);
- kfree(umem);
- }
- return ret ? ERR_PTR(ret) : umem;
- }
- EXPORT_SYMBOL(ib_umem_get);
- /**
- * ib_umem_release - release memory pinned with ib_umem_get
- * @umem: umem struct to release
- */
- void ib_umem_release(struct ib_umem *umem)
- {
- if (!umem)
- return;
- if (umem->is_dmabuf)
- return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
- if (umem->is_odp)
- return ib_umem_odp_release(to_ib_umem_odp(umem));
- __ib_umem_release(umem->ibdev, umem, 1);
- atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
- mmdrop(umem->owning_mm);
- kfree(umem);
- }
- EXPORT_SYMBOL(ib_umem_release);
- /*
- * Copy from the given ib_umem's pages to the given buffer.
- *
- * umem - the umem to copy from
- * offset - offset to start copying from
- * dst - destination buffer
- * length - buffer length
- *
- * Returns 0 on success, or an error code.
- */
- int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
- size_t length)
- {
- size_t end = offset + length;
- int ret;
- if (offset > umem->length || length > umem->length - offset) {
- pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
- __func__, offset, umem->length, end);
- return -EINVAL;
- }
- ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
- umem->sgt_append.sgt.orig_nents, dst, length,
- offset + ib_umem_offset(umem));
- if (ret < 0)
- return ret;
- else if (ret != length)
- return -EINVAL;
- else
- return 0;
- }
- EXPORT_SYMBOL(ib_umem_copy_from);
|