625 lines
15 KiB
C
625 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* MMU-based software IOTLB.
|
|
*
|
|
* Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
|
|
*
|
|
* Author: Xie Yongji <xieyongji@bytedance.com>
|
|
*
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/file.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/vdpa.h>
|
|
|
|
#include "iova_domain.h"
|
|
|
|
static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
|
|
u64 start, u64 last,
|
|
u64 addr, unsigned int perm,
|
|
struct file *file, u64 offset)
|
|
{
|
|
struct vdpa_map_file *map_file;
|
|
int ret;
|
|
|
|
map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
|
|
if (!map_file)
|
|
return -ENOMEM;
|
|
|
|
map_file->file = get_file(file);
|
|
map_file->offset = offset;
|
|
|
|
ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
|
|
addr, perm, map_file);
|
|
if (ret) {
|
|
fput(map_file->file);
|
|
kfree(map_file);
|
|
return ret;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
|
|
u64 start, u64 last)
|
|
{
|
|
struct vdpa_map_file *map_file;
|
|
struct vhost_iotlb_map *map;
|
|
|
|
while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
|
|
map_file = (struct vdpa_map_file *)map->opaque;
|
|
fput(map_file->file);
|
|
kfree(map_file);
|
|
vhost_iotlb_map_free(domain->iotlb, map);
|
|
}
|
|
}
|
|
|
|
int vduse_domain_set_map(struct vduse_iova_domain *domain,
|
|
struct vhost_iotlb *iotlb)
|
|
{
|
|
struct vdpa_map_file *map_file;
|
|
struct vhost_iotlb_map *map;
|
|
u64 start = 0ULL, last = ULLONG_MAX;
|
|
int ret;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
vduse_iotlb_del_range(domain, start, last);
|
|
|
|
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
|
map = vhost_iotlb_itree_next(map, start, last)) {
|
|
map_file = (struct vdpa_map_file *)map->opaque;
|
|
ret = vduse_iotlb_add_range(domain, map->start, map->last,
|
|
map->addr, map->perm,
|
|
map_file->file,
|
|
map_file->offset);
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
spin_unlock(&domain->iotlb_lock);
|
|
|
|
return 0;
|
|
err:
|
|
vduse_iotlb_del_range(domain, start, last);
|
|
spin_unlock(&domain->iotlb_lock);
|
|
return ret;
|
|
}
|
|
|
|
void vduse_domain_clear_map(struct vduse_iova_domain *domain,
|
|
struct vhost_iotlb *iotlb)
|
|
{
|
|
struct vhost_iotlb_map *map;
|
|
u64 start = 0ULL, last = ULLONG_MAX;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
|
|
map = vhost_iotlb_itree_next(map, start, last)) {
|
|
vduse_iotlb_del_range(domain, map->start, map->last);
|
|
}
|
|
spin_unlock(&domain->iotlb_lock);
|
|
}
|
|
|
|
static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
|
|
u64 iova, u64 size, u64 paddr)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
u64 last = iova + size - 1;
|
|
|
|
while (iova <= last) {
|
|
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
|
if (!map->bounce_page) {
|
|
map->bounce_page = alloc_page(GFP_ATOMIC);
|
|
if (!map->bounce_page)
|
|
return -ENOMEM;
|
|
}
|
|
map->orig_phys = paddr;
|
|
paddr += PAGE_SIZE;
|
|
iova += PAGE_SIZE;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
|
|
u64 iova, u64 size)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
u64 last = iova + size - 1;
|
|
|
|
while (iova <= last) {
|
|
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
|
map->orig_phys = INVALID_PHYS_ADDR;
|
|
iova += PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
static void do_bounce(phys_addr_t orig, void *addr, size_t size,
|
|
enum dma_data_direction dir)
|
|
{
|
|
unsigned long pfn = PFN_DOWN(orig);
|
|
unsigned int offset = offset_in_page(orig);
|
|
struct page *page;
|
|
unsigned int sz = 0;
|
|
|
|
while (size) {
|
|
sz = min_t(size_t, PAGE_SIZE - offset, size);
|
|
|
|
page = pfn_to_page(pfn);
|
|
if (dir == DMA_TO_DEVICE)
|
|
memcpy_from_page(addr, page, offset, sz);
|
|
else
|
|
memcpy_to_page(page, offset, addr, sz);
|
|
|
|
size -= sz;
|
|
pfn++;
|
|
addr += sz;
|
|
offset = 0;
|
|
}
|
|
}
|
|
|
|
static void vduse_domain_bounce(struct vduse_iova_domain *domain,
|
|
dma_addr_t iova, size_t size,
|
|
enum dma_data_direction dir)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
unsigned int offset;
|
|
void *addr;
|
|
size_t sz;
|
|
|
|
if (iova >= domain->bounce_size)
|
|
return;
|
|
|
|
while (size) {
|
|
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
|
offset = offset_in_page(iova);
|
|
sz = min_t(size_t, PAGE_SIZE - offset, size);
|
|
|
|
if (WARN_ON(!map->bounce_page ||
|
|
map->orig_phys == INVALID_PHYS_ADDR))
|
|
return;
|
|
|
|
addr = kmap_local_page(map->bounce_page);
|
|
do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
|
|
kunmap_local(addr);
|
|
size -= sz;
|
|
iova += sz;
|
|
}
|
|
}
|
|
|
|
static struct page *
|
|
vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
|
|
{
|
|
u64 start = iova & PAGE_MASK;
|
|
u64 last = start + PAGE_SIZE - 1;
|
|
struct vhost_iotlb_map *map;
|
|
struct page *page = NULL;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
map = vhost_iotlb_itree_first(domain->iotlb, start, last);
|
|
if (!map)
|
|
goto out;
|
|
|
|
page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
|
|
get_page(page);
|
|
out:
|
|
spin_unlock(&domain->iotlb_lock);
|
|
|
|
return page;
|
|
}
|
|
|
|
static struct page *
|
|
vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
struct page *page = NULL;
|
|
|
|
read_lock(&domain->bounce_lock);
|
|
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
|
|
if (domain->user_bounce_pages || !map->bounce_page)
|
|
goto out;
|
|
|
|
page = map->bounce_page;
|
|
get_page(page);
|
|
out:
|
|
read_unlock(&domain->bounce_lock);
|
|
|
|
return page;
|
|
}
|
|
|
|
static void
|
|
vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
unsigned long pfn, bounce_pfns;
|
|
|
|
bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
|
|
|
|
for (pfn = 0; pfn < bounce_pfns; pfn++) {
|
|
map = &domain->bounce_maps[pfn];
|
|
if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
|
|
continue;
|
|
|
|
if (!map->bounce_page)
|
|
continue;
|
|
|
|
__free_page(map->bounce_page);
|
|
map->bounce_page = NULL;
|
|
}
|
|
}
|
|
|
|
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
|
|
struct page **pages, int count)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
int i, ret;
|
|
|
|
/* Now we don't support partial mapping */
|
|
if (count != (domain->bounce_size >> PAGE_SHIFT))
|
|
return -EINVAL;
|
|
|
|
write_lock(&domain->bounce_lock);
|
|
ret = -EEXIST;
|
|
if (domain->user_bounce_pages)
|
|
goto out;
|
|
|
|
for (i = 0; i < count; i++) {
|
|
map = &domain->bounce_maps[i];
|
|
if (map->bounce_page) {
|
|
/* Copy kernel page to user page if it's in use */
|
|
if (map->orig_phys != INVALID_PHYS_ADDR)
|
|
memcpy_to_page(pages[i], 0,
|
|
page_address(map->bounce_page),
|
|
PAGE_SIZE);
|
|
__free_page(map->bounce_page);
|
|
}
|
|
map->bounce_page = pages[i];
|
|
get_page(pages[i]);
|
|
}
|
|
domain->user_bounce_pages = true;
|
|
ret = 0;
|
|
out:
|
|
write_unlock(&domain->bounce_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
|
|
{
|
|
struct vduse_bounce_map *map;
|
|
unsigned long i, count;
|
|
|
|
write_lock(&domain->bounce_lock);
|
|
if (!domain->user_bounce_pages)
|
|
goto out;
|
|
|
|
count = domain->bounce_size >> PAGE_SHIFT;
|
|
for (i = 0; i < count; i++) {
|
|
struct page *page = NULL;
|
|
|
|
map = &domain->bounce_maps[i];
|
|
if (WARN_ON(!map->bounce_page))
|
|
continue;
|
|
|
|
/* Copy user page to kernel page if it's in use */
|
|
if (map->orig_phys != INVALID_PHYS_ADDR) {
|
|
page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
|
|
memcpy_from_page(page_address(page),
|
|
map->bounce_page, 0, PAGE_SIZE);
|
|
}
|
|
put_page(map->bounce_page);
|
|
map->bounce_page = page;
|
|
}
|
|
domain->user_bounce_pages = false;
|
|
out:
|
|
write_unlock(&domain->bounce_lock);
|
|
}
|
|
|
|
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
|
|
{
|
|
if (!domain->bounce_map)
|
|
return;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
if (!domain->bounce_map)
|
|
goto unlock;
|
|
|
|
vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
|
|
domain->bounce_map = 0;
|
|
unlock:
|
|
spin_unlock(&domain->iotlb_lock);
|
|
}
|
|
|
|
static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (domain->bounce_map)
|
|
return 0;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
if (domain->bounce_map)
|
|
goto unlock;
|
|
|
|
ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
|
|
0, VHOST_MAP_RW, domain->file, 0);
|
|
if (ret)
|
|
goto unlock;
|
|
|
|
domain->bounce_map = 1;
|
|
unlock:
|
|
spin_unlock(&domain->iotlb_lock);
|
|
return ret;
|
|
}
|
|
|
|
static dma_addr_t
|
|
vduse_domain_alloc_iova(struct iova_domain *iovad,
|
|
unsigned long size, unsigned long limit)
|
|
{
|
|
unsigned long shift = iova_shift(iovad);
|
|
unsigned long iova_len = iova_align(iovad, size) >> shift;
|
|
unsigned long iova_pfn;
|
|
|
|
iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
|
|
|
|
return (dma_addr_t)iova_pfn << shift;
|
|
}
|
|
|
|
static void vduse_domain_free_iova(struct iova_domain *iovad,
|
|
dma_addr_t iova, size_t size)
|
|
{
|
|
unsigned long shift = iova_shift(iovad);
|
|
unsigned long iova_len = iova_align(iovad, size) >> shift;
|
|
|
|
free_iova_fast(iovad, iova >> shift, iova_len);
|
|
}
|
|
|
|
dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
|
|
struct page *page, unsigned long offset,
|
|
size_t size, enum dma_data_direction dir,
|
|
unsigned long attrs)
|
|
{
|
|
struct iova_domain *iovad = &domain->stream_iovad;
|
|
unsigned long limit = domain->bounce_size - 1;
|
|
phys_addr_t pa = page_to_phys(page) + offset;
|
|
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
|
|
|
|
if (!iova)
|
|
return DMA_MAPPING_ERROR;
|
|
|
|
if (vduse_domain_init_bounce_map(domain))
|
|
goto err;
|
|
|
|
read_lock(&domain->bounce_lock);
|
|
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
|
|
goto err_unlock;
|
|
|
|
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
|
|
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
|
|
|
|
read_unlock(&domain->bounce_lock);
|
|
|
|
return iova;
|
|
err_unlock:
|
|
read_unlock(&domain->bounce_lock);
|
|
err:
|
|
vduse_domain_free_iova(iovad, iova, size);
|
|
return DMA_MAPPING_ERROR;
|
|
}
|
|
|
|
void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
|
|
dma_addr_t dma_addr, size_t size,
|
|
enum dma_data_direction dir, unsigned long attrs)
|
|
{
|
|
struct iova_domain *iovad = &domain->stream_iovad;
|
|
|
|
read_lock(&domain->bounce_lock);
|
|
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
|
|
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
|
|
|
|
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
|
|
read_unlock(&domain->bounce_lock);
|
|
vduse_domain_free_iova(iovad, dma_addr, size);
|
|
}
|
|
|
|
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
|
size_t size, dma_addr_t *dma_addr,
|
|
gfp_t flag, unsigned long attrs)
|
|
{
|
|
struct iova_domain *iovad = &domain->consistent_iovad;
|
|
unsigned long limit = domain->iova_limit;
|
|
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
|
|
void *orig = alloc_pages_exact(size, flag);
|
|
|
|
if (!iova || !orig)
|
|
goto err;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
|
|
virt_to_phys(orig), VHOST_MAP_RW,
|
|
domain->file, (u64)iova)) {
|
|
spin_unlock(&domain->iotlb_lock);
|
|
goto err;
|
|
}
|
|
spin_unlock(&domain->iotlb_lock);
|
|
|
|
*dma_addr = iova;
|
|
|
|
return orig;
|
|
err:
|
|
*dma_addr = DMA_MAPPING_ERROR;
|
|
if (orig)
|
|
free_pages_exact(orig, size);
|
|
if (iova)
|
|
vduse_domain_free_iova(iovad, iova, size);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
|
void *vaddr, dma_addr_t dma_addr,
|
|
unsigned long attrs)
|
|
{
|
|
struct iova_domain *iovad = &domain->consistent_iovad;
|
|
struct vhost_iotlb_map *map;
|
|
struct vdpa_map_file *map_file;
|
|
phys_addr_t pa;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
|
|
(u64)dma_addr + size - 1);
|
|
if (WARN_ON(!map)) {
|
|
spin_unlock(&domain->iotlb_lock);
|
|
return;
|
|
}
|
|
map_file = (struct vdpa_map_file *)map->opaque;
|
|
fput(map_file->file);
|
|
kfree(map_file);
|
|
pa = map->addr;
|
|
vhost_iotlb_map_free(domain->iotlb, map);
|
|
spin_unlock(&domain->iotlb_lock);
|
|
|
|
vduse_domain_free_iova(iovad, dma_addr, size);
|
|
free_pages_exact(phys_to_virt(pa), size);
|
|
}
|
|
|
|
static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
|
|
{
|
|
struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
|
|
unsigned long iova = vmf->pgoff << PAGE_SHIFT;
|
|
struct page *page;
|
|
|
|
if (!domain)
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
if (iova < domain->bounce_size)
|
|
page = vduse_domain_get_bounce_page(domain, iova);
|
|
else
|
|
page = vduse_domain_get_coherent_page(domain, iova);
|
|
|
|
if (!page)
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
vmf->page = page;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct vm_operations_struct vduse_domain_mmap_ops = {
|
|
.fault = vduse_domain_mmap_fault,
|
|
};
|
|
|
|
static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
|
|
{
|
|
struct vduse_iova_domain *domain = file->private_data;
|
|
|
|
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
|
|
vma->vm_private_data = domain;
|
|
vma->vm_ops = &vduse_domain_mmap_ops;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vduse_domain_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct vduse_iova_domain *domain = file->private_data;
|
|
|
|
spin_lock(&domain->iotlb_lock);
|
|
vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
|
|
vduse_domain_remove_user_bounce_pages(domain);
|
|
vduse_domain_free_kernel_bounce_pages(domain);
|
|
spin_unlock(&domain->iotlb_lock);
|
|
put_iova_domain(&domain->stream_iovad);
|
|
put_iova_domain(&domain->consistent_iovad);
|
|
vhost_iotlb_free(domain->iotlb);
|
|
vfree(domain->bounce_maps);
|
|
kfree(domain);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations vduse_domain_fops = {
|
|
.owner = THIS_MODULE,
|
|
.mmap = vduse_domain_mmap,
|
|
.release = vduse_domain_release,
|
|
};
|
|
|
|
void vduse_domain_destroy(struct vduse_iova_domain *domain)
|
|
{
|
|
fput(domain->file);
|
|
}
|
|
|
|
struct vduse_iova_domain *
|
|
vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
|
|
{
|
|
struct vduse_iova_domain *domain;
|
|
struct file *file;
|
|
struct vduse_bounce_map *map;
|
|
unsigned long pfn, bounce_pfns;
|
|
int ret;
|
|
|
|
bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
|
|
if (iova_limit <= bounce_size)
|
|
return NULL;
|
|
|
|
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
|
if (!domain)
|
|
return NULL;
|
|
|
|
domain->iotlb = vhost_iotlb_alloc(0, 0);
|
|
if (!domain->iotlb)
|
|
goto err_iotlb;
|
|
|
|
domain->iova_limit = iova_limit;
|
|
domain->bounce_size = PAGE_ALIGN(bounce_size);
|
|
domain->bounce_maps = vzalloc(bounce_pfns *
|
|
sizeof(struct vduse_bounce_map));
|
|
if (!domain->bounce_maps)
|
|
goto err_map;
|
|
|
|
for (pfn = 0; pfn < bounce_pfns; pfn++) {
|
|
map = &domain->bounce_maps[pfn];
|
|
map->orig_phys = INVALID_PHYS_ADDR;
|
|
}
|
|
file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
|
|
domain, O_RDWR);
|
|
if (IS_ERR(file))
|
|
goto err_file;
|
|
|
|
domain->file = file;
|
|
rwlock_init(&domain->bounce_lock);
|
|
spin_lock_init(&domain->iotlb_lock);
|
|
init_iova_domain(&domain->stream_iovad,
|
|
PAGE_SIZE, IOVA_START_PFN);
|
|
ret = iova_domain_init_rcaches(&domain->stream_iovad);
|
|
if (ret)
|
|
goto err_iovad_stream;
|
|
init_iova_domain(&domain->consistent_iovad,
|
|
PAGE_SIZE, bounce_pfns);
|
|
ret = iova_domain_init_rcaches(&domain->consistent_iovad);
|
|
if (ret)
|
|
goto err_iovad_consistent;
|
|
|
|
return domain;
|
|
err_iovad_consistent:
|
|
put_iova_domain(&domain->stream_iovad);
|
|
err_iovad_stream:
|
|
fput(file);
|
|
err_file:
|
|
vfree(domain->bounce_maps);
|
|
err_map:
|
|
vhost_iotlb_free(domain->iotlb);
|
|
err_iotlb:
|
|
kfree(domain);
|
|
return NULL;
|
|
}
|
|
|
|
int vduse_domain_init(void)
|
|
{
|
|
return iova_cache_get();
|
|
}
|
|
|
|
void vduse_domain_exit(void)
|
|
{
|
|
iova_cache_put();
|
|
}
|