1363 lines
36 KiB
C
1363 lines
36 KiB
C
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
|
/*
|
|
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
|
|
*/
|
|
|
|
#include "cmd.h"
|
|
|
|
enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
|
|
|
|
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
|
|
u16 *vhca_id);
|
|
static void
|
|
_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev);
|
|
|
|
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
|
|
{
|
|
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
|
|
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach)
|
|
return -ENOTCONN;
|
|
|
|
MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
|
|
MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
|
|
|
|
return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
|
|
}
|
|
|
|
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
|
|
{
|
|
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
|
|
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach)
|
|
return -ENOTCONN;
|
|
|
|
MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
|
|
MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
|
|
|
|
return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
|
|
}
|
|
|
|
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
|
|
size_t *state_size)
|
|
{
|
|
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
|
|
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
|
|
int ret;
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach)
|
|
return -ENOTCONN;
|
|
|
|
MLX5_SET(query_vhca_migration_state_in, in, opcode,
|
|
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
|
|
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
|
|
|
|
ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
|
|
out);
|
|
if (ret)
|
|
return ret;
|
|
|
|
*state_size = MLX5_GET(query_vhca_migration_state_out, out,
|
|
required_umem_size);
|
|
return 0;
|
|
}
|
|
|
|
static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
|
|
{
|
|
/* Mark the tracker under an error and wake it up if it's running */
|
|
mvdev->tracker.is_err = true;
|
|
complete(&mvdev->tracker_comp);
|
|
}
|
|
|
|
static int mlx5fv_vf_event(struct notifier_block *nb,
|
|
unsigned long event, void *data)
|
|
{
|
|
struct mlx5vf_pci_core_device *mvdev =
|
|
container_of(nb, struct mlx5vf_pci_core_device, nb);
|
|
|
|
switch (event) {
|
|
case MLX5_PF_NOTIFY_ENABLE_VF:
|
|
mutex_lock(&mvdev->state_mutex);
|
|
mvdev->mdev_detach = false;
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
break;
|
|
case MLX5_PF_NOTIFY_DISABLE_VF:
|
|
mlx5vf_cmd_close_migratable(mvdev);
|
|
mutex_lock(&mvdev->state_mutex);
|
|
mvdev->mdev_detach = true;
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
|
|
{
|
|
if (!mvdev->migrate_cap)
|
|
return;
|
|
|
|
/* Must be done outside the lock to let it progress */
|
|
set_tracker_error(mvdev);
|
|
mutex_lock(&mvdev->state_mutex);
|
|
mlx5vf_disable_fds(mvdev);
|
|
_mlx5vf_free_page_tracker_resources(mvdev);
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
}
|
|
|
|
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
|
|
{
|
|
if (!mvdev->migrate_cap)
|
|
return;
|
|
|
|
mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
|
|
&mvdev->nb);
|
|
destroy_workqueue(mvdev->cb_wq);
|
|
}
|
|
|
|
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
|
|
const struct vfio_migration_ops *mig_ops,
|
|
const struct vfio_log_ops *log_ops)
|
|
{
|
|
struct pci_dev *pdev = mvdev->core_device.pdev;
|
|
int ret;
|
|
|
|
if (!pdev->is_virtfn)
|
|
return;
|
|
|
|
mvdev->mdev = mlx5_vf_get_core_dev(pdev);
|
|
if (!mvdev->mdev)
|
|
return;
|
|
|
|
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
|
|
goto end;
|
|
|
|
mvdev->vf_id = pci_iov_vf_id(pdev);
|
|
if (mvdev->vf_id < 0)
|
|
goto end;
|
|
|
|
if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
|
|
&mvdev->vhca_id))
|
|
goto end;
|
|
|
|
mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
|
|
if (!mvdev->cb_wq)
|
|
goto end;
|
|
|
|
mutex_init(&mvdev->state_mutex);
|
|
spin_lock_init(&mvdev->reset_lock);
|
|
mvdev->nb.notifier_call = mlx5fv_vf_event;
|
|
ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
|
|
&mvdev->nb);
|
|
if (ret) {
|
|
destroy_workqueue(mvdev->cb_wq);
|
|
goto end;
|
|
}
|
|
|
|
mvdev->migrate_cap = 1;
|
|
mvdev->core_device.vdev.migration_flags =
|
|
VFIO_MIGRATION_STOP_COPY |
|
|
VFIO_MIGRATION_P2P;
|
|
mvdev->core_device.vdev.mig_ops = mig_ops;
|
|
init_completion(&mvdev->tracker_comp);
|
|
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
|
|
mvdev->core_device.vdev.log_ops = log_ops;
|
|
|
|
end:
|
|
mlx5_vf_put_core_dev(mvdev->mdev);
|
|
}
|
|
|
|
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
|
|
u16 *vhca_id)
|
|
{
|
|
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
|
|
int out_size;
|
|
void *out;
|
|
int ret;
|
|
|
|
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
|
|
out = kzalloc(out_size, GFP_KERNEL);
|
|
if (!out)
|
|
return -ENOMEM;
|
|
|
|
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
|
|
MLX5_SET(query_hca_cap_in, in, other_function, 1);
|
|
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
|
|
MLX5_SET(query_hca_cap_in, in, op_mod,
|
|
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
|
|
HCA_CAP_OPMOD_GET_CUR);
|
|
|
|
ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
|
|
if (ret)
|
|
goto err_exec;
|
|
|
|
*vhca_id = MLX5_GET(query_hca_cap_out, out,
|
|
capability.cmd_hca_cap.vhca_id);
|
|
|
|
err_exec:
|
|
kfree(out);
|
|
return ret;
|
|
}
|
|
|
|
static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
|
|
struct mlx5_vf_migration_file *migf,
|
|
struct mlx5_vhca_recv_buf *recv_buf,
|
|
u32 *mkey)
|
|
{
|
|
size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) :
|
|
recv_buf->npages;
|
|
int err = 0, inlen;
|
|
__be64 *mtt;
|
|
void *mkc;
|
|
u32 *in;
|
|
|
|
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
|
|
sizeof(*mtt) * round_up(npages, 2);
|
|
|
|
in = kvzalloc(inlen, GFP_KERNEL);
|
|
if (!in)
|
|
return -ENOMEM;
|
|
|
|
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
|
|
DIV_ROUND_UP(npages, 2));
|
|
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
|
|
|
|
if (migf) {
|
|
struct sg_dma_page_iter dma_iter;
|
|
|
|
for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
|
|
*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
|
|
} else {
|
|
int i;
|
|
|
|
for (i = 0; i < npages; i++)
|
|
*mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
|
|
}
|
|
|
|
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
|
|
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
|
|
MLX5_SET(mkc, mkc, lr, 1);
|
|
MLX5_SET(mkc, mkc, lw, 1);
|
|
MLX5_SET(mkc, mkc, rr, 1);
|
|
MLX5_SET(mkc, mkc, rw, 1);
|
|
MLX5_SET(mkc, mkc, pd, pdn);
|
|
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
|
|
MLX5_SET(mkc, mkc, qpn, 0xffffff);
|
|
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
|
|
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
|
|
MLX5_SET64(mkc, mkc, len,
|
|
migf ? migf->total_length : (npages * PAGE_SIZE));
|
|
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
|
|
kvfree(in);
|
|
return err;
|
|
}
|
|
|
|
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
|
|
{
|
|
struct mlx5vf_async_data *async_data = container_of(_work,
|
|
struct mlx5vf_async_data, work);
|
|
struct mlx5_vf_migration_file *migf = container_of(async_data,
|
|
struct mlx5_vf_migration_file, async_data);
|
|
struct mlx5_core_dev *mdev = migf->mvdev->mdev;
|
|
|
|
mutex_lock(&migf->lock);
|
|
if (async_data->status) {
|
|
migf->is_err = true;
|
|
wake_up_interruptible(&migf->poll_wait);
|
|
}
|
|
mutex_unlock(&migf->lock);
|
|
|
|
mlx5_core_destroy_mkey(mdev, async_data->mkey);
|
|
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
|
|
mlx5_core_dealloc_pd(mdev, async_data->pdn);
|
|
kvfree(async_data->out);
|
|
fput(migf->filp);
|
|
}
|
|
|
|
static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
|
|
{
|
|
struct mlx5vf_async_data *async_data = container_of(context,
|
|
struct mlx5vf_async_data, cb_work);
|
|
struct mlx5_vf_migration_file *migf = container_of(async_data,
|
|
struct mlx5_vf_migration_file, async_data);
|
|
|
|
if (!status) {
|
|
WRITE_ONCE(migf->total_length,
|
|
MLX5_GET(save_vhca_state_out, async_data->out,
|
|
actual_image_size));
|
|
wake_up_interruptible(&migf->poll_wait);
|
|
}
|
|
|
|
/*
|
|
* The error and the cleanup flows can't run from an
|
|
* interrupt context
|
|
*/
|
|
async_data->status = status;
|
|
queue_work(migf->mvdev->cb_wq, &async_data->work);
|
|
}
|
|
|
|
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|
struct mlx5_vf_migration_file *migf)
|
|
{
|
|
u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
|
|
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
|
|
struct mlx5vf_async_data *async_data;
|
|
struct mlx5_core_dev *mdev;
|
|
u32 pdn, mkey;
|
|
int err;
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach)
|
|
return -ENOTCONN;
|
|
|
|
mdev = mvdev->mdev;
|
|
err = mlx5_core_alloc_pd(mdev, &pdn);
|
|
if (err)
|
|
return err;
|
|
|
|
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
|
|
0);
|
|
if (err)
|
|
goto err_dma_map;
|
|
|
|
err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
|
|
if (err)
|
|
goto err_create_mkey;
|
|
|
|
MLX5_SET(save_vhca_state_in, in, opcode,
|
|
MLX5_CMD_OP_SAVE_VHCA_STATE);
|
|
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
|
|
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(save_vhca_state_in, in, mkey, mkey);
|
|
MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
|
|
|
|
async_data = &migf->async_data;
|
|
async_data->out = kvzalloc(out_size, GFP_KERNEL);
|
|
if (!async_data->out) {
|
|
err = -ENOMEM;
|
|
goto err_out;
|
|
}
|
|
|
|
/* no data exists till the callback comes back */
|
|
migf->total_length = 0;
|
|
get_file(migf->filp);
|
|
async_data->mkey = mkey;
|
|
async_data->pdn = pdn;
|
|
err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
|
|
async_data->out,
|
|
out_size, mlx5vf_save_callback,
|
|
&async_data->cb_work);
|
|
if (err)
|
|
goto err_exec;
|
|
|
|
return 0;
|
|
|
|
err_exec:
|
|
fput(migf->filp);
|
|
kvfree(async_data->out);
|
|
err_out:
|
|
mlx5_core_destroy_mkey(mdev, mkey);
|
|
err_create_mkey:
|
|
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
|
|
err_dma_map:
|
|
mlx5_core_dealloc_pd(mdev, pdn);
|
|
return err;
|
|
}
|
|
|
|
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|
struct mlx5_vf_migration_file *migf)
|
|
{
|
|
struct mlx5_core_dev *mdev;
|
|
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
|
|
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
|
|
u32 pdn, mkey;
|
|
int err;
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach)
|
|
return -ENOTCONN;
|
|
|
|
mutex_lock(&migf->lock);
|
|
if (!migf->total_length) {
|
|
err = -EINVAL;
|
|
goto end;
|
|
}
|
|
|
|
mdev = mvdev->mdev;
|
|
err = mlx5_core_alloc_pd(mdev, &pdn);
|
|
if (err)
|
|
goto end;
|
|
|
|
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
|
|
if (err)
|
|
goto err_reg;
|
|
|
|
err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
|
|
if (err)
|
|
goto err_mkey;
|
|
|
|
MLX5_SET(load_vhca_state_in, in, opcode,
|
|
MLX5_CMD_OP_LOAD_VHCA_STATE);
|
|
MLX5_SET(load_vhca_state_in, in, op_mod, 0);
|
|
MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(load_vhca_state_in, in, mkey, mkey);
|
|
MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
|
|
|
|
err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
|
|
|
|
mlx5_core_destroy_mkey(mdev, mkey);
|
|
err_mkey:
|
|
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
|
|
err_reg:
|
|
mlx5_core_dealloc_pd(mdev, pdn);
|
|
end:
|
|
mutex_unlock(&migf->lock);
|
|
return err;
|
|
}
|
|
|
|
static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
|
|
u32 req_nodes)
|
|
{
|
|
struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
|
|
unsigned long min_gap;
|
|
unsigned long curr_gap;
|
|
|
|
/* Special shortcut when a single range is required */
|
|
if (req_nodes == 1) {
|
|
unsigned long last;
|
|
|
|
curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
|
|
while (curr) {
|
|
last = curr->last;
|
|
prev = curr;
|
|
curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
|
|
if (prev != comb_start)
|
|
interval_tree_remove(prev, root);
|
|
}
|
|
comb_start->last = last;
|
|
return;
|
|
}
|
|
|
|
/* Combine ranges which have the smallest gap */
|
|
while (cur_nodes > req_nodes) {
|
|
prev = NULL;
|
|
min_gap = ULONG_MAX;
|
|
curr = interval_tree_iter_first(root, 0, ULONG_MAX);
|
|
while (curr) {
|
|
if (prev) {
|
|
curr_gap = curr->start - prev->last;
|
|
if (curr_gap < min_gap) {
|
|
min_gap = curr_gap;
|
|
comb_start = prev;
|
|
comb_end = curr;
|
|
}
|
|
}
|
|
prev = curr;
|
|
curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
|
|
}
|
|
comb_start->last = comb_end->last;
|
|
interval_tree_remove(comb_end, root);
|
|
cur_nodes--;
|
|
}
|
|
}
|
|
|
|
static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
|
|
struct mlx5vf_pci_core_device *mvdev,
|
|
struct rb_root_cached *ranges, u32 nnodes)
|
|
{
|
|
int max_num_range =
|
|
MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range);
|
|
struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
|
|
int record_size = MLX5_ST_SZ_BYTES(page_track_range);
|
|
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
|
|
struct interval_tree_node *node = NULL;
|
|
u64 total_ranges_len = 0;
|
|
u32 num_ranges = nnodes;
|
|
u8 log_addr_space_size;
|
|
void *range_list_ptr;
|
|
void *obj_context;
|
|
void *cmd_hdr;
|
|
int inlen;
|
|
void *in;
|
|
int err;
|
|
int i;
|
|
|
|
if (num_ranges > max_num_range) {
|
|
combine_ranges(ranges, nnodes, max_num_range);
|
|
num_ranges = max_num_range;
|
|
}
|
|
|
|
inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) +
|
|
record_size * num_ranges;
|
|
in = kzalloc(inlen, GFP_KERNEL);
|
|
if (!in)
|
|
return -ENOMEM;
|
|
|
|
cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in,
|
|
general_obj_in_cmd_hdr);
|
|
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode,
|
|
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
|
|
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type,
|
|
MLX5_OBJ_TYPE_PAGE_TRACK);
|
|
obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context);
|
|
MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id);
|
|
MLX5_SET(page_track, obj_context, track_type, 1);
|
|
MLX5_SET(page_track, obj_context, log_page_size,
|
|
ilog2(tracker->host_qp->tracked_page_size));
|
|
MLX5_SET(page_track, obj_context, log_msg_size,
|
|
ilog2(tracker->host_qp->max_msg_size));
|
|
MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn);
|
|
MLX5_SET(page_track, obj_context, num_ranges, num_ranges);
|
|
|
|
range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range);
|
|
node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
|
|
for (i = 0; i < num_ranges; i++) {
|
|
void *addr_range_i_base = range_list_ptr + record_size * i;
|
|
unsigned long length = node->last - node->start;
|
|
|
|
MLX5_SET64(page_track_range, addr_range_i_base, start_address,
|
|
node->start);
|
|
MLX5_SET64(page_track_range, addr_range_i_base, length, length);
|
|
total_ranges_len += length;
|
|
node = interval_tree_iter_next(node, 0, ULONG_MAX);
|
|
}
|
|
|
|
WARN_ON(node);
|
|
log_addr_space_size = ilog2(total_ranges_len);
|
|
if (log_addr_space_size <
|
|
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
|
|
log_addr_space_size >
|
|
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) {
|
|
err = -EOPNOTSUPP;
|
|
goto out;
|
|
}
|
|
|
|
MLX5_SET(page_track, obj_context, log_addr_space_size,
|
|
log_addr_space_size);
|
|
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
|
|
if (err)
|
|
goto out;
|
|
|
|
tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
|
|
out:
|
|
kfree(in);
|
|
return err;
|
|
}
|
|
|
|
static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev,
|
|
u32 tracker_id)
|
|
{
|
|
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
|
|
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
|
|
|
|
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
|
|
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
|
|
MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id);
|
|
|
|
return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
|
|
}
|
|
|
|
static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
|
|
u32 tracker_id, unsigned long iova,
|
|
unsigned long length, u32 tracker_state)
|
|
{
|
|
u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {};
|
|
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
|
|
void *obj_context;
|
|
void *cmd_hdr;
|
|
|
|
cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
|
|
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
|
|
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
|
|
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id);
|
|
|
|
obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context);
|
|
MLX5_SET64(page_track, obj_context, modify_field_select, 0x3);
|
|
MLX5_SET64(page_track, obj_context, range_start_address, iova);
|
|
MLX5_SET64(page_track, obj_context, length, length);
|
|
MLX5_SET(page_track, obj_context, state, tracker_state);
|
|
|
|
return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
|
|
}
|
|
|
|
static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_cq_buf *buf, int nent,
|
|
int cqe_size)
|
|
{
|
|
struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
|
|
u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
|
|
u8 log_wq_sz = ilog2(cqe_size);
|
|
int err;
|
|
|
|
err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf,
|
|
mdev->priv.numa_node);
|
|
if (err)
|
|
return err;
|
|
|
|
mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
|
|
buf->cqe_size = cqe_size;
|
|
buf->nent = nent;
|
|
return 0;
|
|
}
|
|
|
|
static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf)
|
|
{
|
|
struct mlx5_cqe64 *cqe64;
|
|
void *cqe;
|
|
int i;
|
|
|
|
for (i = 0; i < buf->nent; i++) {
|
|
cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
|
|
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
|
|
cqe64->op_own = MLX5_CQE_INVALID << 4;
|
|
}
|
|
}
|
|
|
|
static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_cq *cq)
|
|
{
|
|
mlx5_core_destroy_cq(mdev, &cq->mcq);
|
|
mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
|
|
mlx5_db_free(mdev, &cq->db);
|
|
}
|
|
|
|
static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
|
|
{
|
|
if (type != MLX5_EVENT_TYPE_CQ_ERROR)
|
|
return;
|
|
|
|
set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device,
|
|
tracker.cq.mcq));
|
|
}
|
|
|
|
static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
|
|
void *data)
|
|
{
|
|
struct mlx5_vhca_page_tracker *tracker =
|
|
mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb);
|
|
struct mlx5vf_pci_core_device *mvdev = container_of(
|
|
tracker, struct mlx5vf_pci_core_device, tracker);
|
|
struct mlx5_eqe *eqe = data;
|
|
u8 event_type = (u8)type;
|
|
u8 queue_type;
|
|
int qp_num;
|
|
|
|
switch (event_type) {
|
|
case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
|
|
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
|
|
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
|
|
queue_type = eqe->data.qp_srq.type;
|
|
if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP)
|
|
break;
|
|
qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
|
|
if (qp_num != tracker->host_qp->qpn &&
|
|
qp_num != tracker->fw_qp->qpn)
|
|
break;
|
|
set_tracker_error(mvdev);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq,
|
|
struct mlx5_eqe *eqe)
|
|
{
|
|
struct mlx5vf_pci_core_device *mvdev =
|
|
container_of(mcq, struct mlx5vf_pci_core_device,
|
|
tracker.cq.mcq);
|
|
|
|
complete(&mvdev->tracker_comp);
|
|
}
|
|
|
|
static int mlx5vf_create_cq(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_page_tracker *tracker,
|
|
size_t ncqe)
|
|
{
|
|
int cqe_size = cache_line_size() == 128 ? 128 : 64;
|
|
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
|
|
struct mlx5_vhca_cq *cq;
|
|
int inlen, err, eqn;
|
|
void *cqc, *in;
|
|
__be64 *pas;
|
|
int vector;
|
|
|
|
cq = &tracker->cq;
|
|
ncqe = roundup_pow_of_two(ncqe);
|
|
err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node);
|
|
if (err)
|
|
return err;
|
|
|
|
cq->ncqe = ncqe;
|
|
cq->mcq.set_ci_db = cq->db.db;
|
|
cq->mcq.arm_db = cq->db.db + 1;
|
|
cq->mcq.cqe_sz = cqe_size;
|
|
err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size);
|
|
if (err)
|
|
goto err_db_free;
|
|
|
|
init_cq_frag_buf(&cq->buf);
|
|
inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
|
|
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
|
|
cq->buf.frag_buf.npages;
|
|
in = kvzalloc(inlen, GFP_KERNEL);
|
|
if (!in) {
|
|
err = -ENOMEM;
|
|
goto err_buff;
|
|
}
|
|
|
|
vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
|
|
err = mlx5_vector2eqn(mdev, vector, &eqn);
|
|
if (err)
|
|
goto err_vec;
|
|
|
|
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
|
|
MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
|
|
MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
|
|
MLX5_SET(cqc, cqc, uar_page, tracker->uar->index);
|
|
MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift -
|
|
MLX5_ADAPTER_PAGE_SHIFT);
|
|
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
|
|
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
|
|
mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
|
|
cq->mcq.comp = mlx5vf_cq_complete;
|
|
cq->mcq.event = mlx5vf_cq_event;
|
|
err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
|
|
if (err)
|
|
goto err_vec;
|
|
|
|
mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
|
|
cq->mcq.cons_index);
|
|
kvfree(in);
|
|
return 0;
|
|
|
|
err_vec:
|
|
kvfree(in);
|
|
err_buff:
|
|
mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
|
|
err_db_free:
|
|
mlx5_db_free(mdev, &cq->db);
|
|
return err;
|
|
}
|
|
|
|
static struct mlx5_vhca_qp *
|
|
mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr)
|
|
{
|
|
u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
|
|
struct mlx5_vhca_qp *qp;
|
|
u8 log_rq_stride;
|
|
u8 log_rq_sz;
|
|
void *qpc;
|
|
int inlen;
|
|
void *in;
|
|
int err;
|
|
|
|
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
|
|
if (!qp)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
|
|
log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
|
|
log_rq_sz = ilog2(qp->rq.wqe_cnt);
|
|
err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node);
|
|
if (err)
|
|
goto err_free;
|
|
|
|
if (max_recv_wr) {
|
|
err = mlx5_frag_buf_alloc_node(mdev,
|
|
wq_get_byte_sz(log_rq_sz, log_rq_stride),
|
|
&qp->buf, mdev->priv.numa_node);
|
|
if (err)
|
|
goto err_db_free;
|
|
mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc);
|
|
}
|
|
|
|
qp->rq.db = &qp->db.db[MLX5_RCV_DBR];
|
|
inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
|
|
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
|
|
qp->buf.npages;
|
|
in = kvzalloc(inlen, GFP_KERNEL);
|
|
if (!in) {
|
|
err = -ENOMEM;
|
|
goto err_in;
|
|
}
|
|
|
|
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
|
|
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
|
|
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
|
|
MLX5_SET(qpc, qpc, pd, tracker->pdn);
|
|
MLX5_SET(qpc, qpc, uar_page, tracker->uar->index);
|
|
MLX5_SET(qpc, qpc, log_page_size,
|
|
qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
|
|
MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
|
|
if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
|
|
MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
|
|
MLX5_SET(qpc, qpc, no_sq, 1);
|
|
if (max_recv_wr) {
|
|
MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn);
|
|
MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4);
|
|
MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz);
|
|
MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
|
|
MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
|
|
mlx5_fill_page_frag_array(&qp->buf,
|
|
(__be64 *)MLX5_ADDR_OF(create_qp_in,
|
|
in, pas));
|
|
} else {
|
|
MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
|
|
}
|
|
|
|
MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
|
|
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
|
|
kvfree(in);
|
|
if (err)
|
|
goto err_in;
|
|
|
|
qp->qpn = MLX5_GET(create_qp_out, out, qpn);
|
|
return qp;
|
|
|
|
err_in:
|
|
if (max_recv_wr)
|
|
mlx5_frag_buf_free(mdev, &qp->buf);
|
|
err_db_free:
|
|
mlx5_db_free(mdev, &qp->db);
|
|
err_free:
|
|
kfree(qp);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp)
|
|
{
|
|
struct mlx5_wqe_data_seg *data;
|
|
unsigned int ix;
|
|
|
|
WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt);
|
|
ix = qp->rq.pc & (qp->rq.wqe_cnt - 1);
|
|
data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix);
|
|
data->byte_count = cpu_to_be32(qp->max_msg_size);
|
|
data->lkey = cpu_to_be32(qp->recv_buf.mkey);
|
|
data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset);
|
|
qp->rq.pc++;
|
|
/* Make sure that descriptors are written before doorbell record. */
|
|
dma_wmb();
|
|
*qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff);
|
|
}
|
|
|
|
static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_qp *qp, u32 remote_qpn,
|
|
bool host_qp)
|
|
{
|
|
u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
|
|
u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
|
|
u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
|
|
void *qpc;
|
|
int ret;
|
|
|
|
/* Init */
|
|
qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc);
|
|
MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
|
|
MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
|
|
MLX5_SET(qpc, qpc, rre, 1);
|
|
MLX5_SET(qpc, qpc, rwe, 1);
|
|
MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP);
|
|
MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn);
|
|
ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (host_qp) {
|
|
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
|
|
int i;
|
|
|
|
for (i = 0; i < qp->rq.wqe_cnt; i++) {
|
|
mlx5vf_post_recv(qp);
|
|
recv_buf->next_rq_offset += qp->max_msg_size;
|
|
}
|
|
}
|
|
|
|
/* RTR */
|
|
qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc);
|
|
MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
|
|
MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
|
|
MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg));
|
|
MLX5_SET(qpc, qpc, remote_qpn, remote_qpn);
|
|
MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
|
|
MLX5_SET(qpc, qpc, primary_address_path.fl, 1);
|
|
MLX5_SET(qpc, qpc, min_rnr_nak, 1);
|
|
MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
|
|
MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
|
|
ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in);
|
|
if (ret || host_qp)
|
|
return ret;
|
|
|
|
/* RTS */
|
|
qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc);
|
|
MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
|
|
MLX5_SET(qpc, qpc, retry_count, 7);
|
|
MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
|
|
MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
|
|
MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
|
|
MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
|
|
|
|
return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in);
|
|
}
|
|
|
|
static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_qp *qp)
|
|
{
|
|
u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
|
|
|
|
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
|
|
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
|
|
mlx5_cmd_exec_in(mdev, destroy_qp, in);
|
|
|
|
mlx5_frag_buf_free(mdev, &qp->buf);
|
|
mlx5_db_free(mdev, &qp->db);
|
|
kfree(qp);
|
|
}
|
|
|
|
static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
|
|
{
|
|
int i;
|
|
|
|
/* Undo alloc_pages_bulk_array() */
|
|
for (i = 0; i < recv_buf->npages; i++)
|
|
__free_page(recv_buf->page_list[i]);
|
|
|
|
kvfree(recv_buf->page_list);
|
|
}
|
|
|
|
static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
|
|
unsigned int npages)
|
|
{
|
|
unsigned int filled = 0, done = 0;
|
|
int i;
|
|
|
|
recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
|
|
GFP_KERNEL);
|
|
if (!recv_buf->page_list)
|
|
return -ENOMEM;
|
|
|
|
for (;;) {
|
|
filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done,
|
|
recv_buf->page_list + done);
|
|
if (!filled)
|
|
goto err;
|
|
|
|
done += filled;
|
|
if (done == npages)
|
|
break;
|
|
}
|
|
|
|
recv_buf->npages = npages;
|
|
return 0;
|
|
|
|
err:
|
|
for (i = 0; i < npages; i++) {
|
|
if (recv_buf->page_list[i])
|
|
__free_page(recv_buf->page_list[i]);
|
|
}
|
|
|
|
kvfree(recv_buf->page_list);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_recv_buf *recv_buf)
|
|
{
|
|
int i, j;
|
|
|
|
recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
|
|
sizeof(*recv_buf->dma_addrs),
|
|
GFP_KERNEL);
|
|
if (!recv_buf->dma_addrs)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < recv_buf->npages; i++) {
|
|
recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
|
|
recv_buf->page_list[i],
|
|
0, PAGE_SIZE,
|
|
DMA_FROM_DEVICE);
|
|
if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
|
|
goto error;
|
|
}
|
|
return 0;
|
|
|
|
error:
|
|
for (j = 0; j < i; j++)
|
|
dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
|
|
PAGE_SIZE, DMA_FROM_DEVICE);
|
|
|
|
kvfree(recv_buf->dma_addrs);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_recv_buf *recv_buf)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < recv_buf->npages; i++)
|
|
dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
|
|
PAGE_SIZE, DMA_FROM_DEVICE);
|
|
|
|
kvfree(recv_buf->dma_addrs);
|
|
}
|
|
|
|
static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_qp *qp)
|
|
{
|
|
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
|
|
|
|
mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
|
|
unregister_dma_recv_pages(mdev, recv_buf);
|
|
free_recv_pages(&qp->recv_buf);
|
|
}
|
|
|
|
static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
|
|
struct mlx5_vhca_qp *qp, u32 pdn,
|
|
u64 rq_size)
|
|
{
|
|
unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE);
|
|
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
|
|
int err;
|
|
|
|
err = alloc_recv_pages(recv_buf, npages);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
err = register_dma_recv_pages(mdev, recv_buf);
|
|
if (err)
|
|
goto end;
|
|
|
|
err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
|
|
if (err)
|
|
goto err_create_mkey;
|
|
|
|
return 0;
|
|
|
|
err_create_mkey:
|
|
unregister_dma_recv_pages(mdev, recv_buf);
|
|
end:
|
|
free_recv_pages(recv_buf);
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev)
|
|
{
|
|
struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
|
|
struct mlx5_core_dev *mdev = mvdev->mdev;
|
|
|
|
lockdep_assert_held(&mvdev->state_mutex);
|
|
|
|
if (!mvdev->log_active)
|
|
return;
|
|
|
|
WARN_ON(mvdev->mdev_detach);
|
|
|
|
mlx5_eq_notifier_unregister(mdev, &tracker->nb);
|
|
mlx5vf_cmd_destroy_tracker(mdev, tracker->id);
|
|
mlx5vf_destroy_qp(mdev, tracker->fw_qp);
|
|
mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp);
|
|
mlx5vf_destroy_qp(mdev, tracker->host_qp);
|
|
mlx5vf_destroy_cq(mdev, &tracker->cq);
|
|
mlx5_core_dealloc_pd(mdev, tracker->pdn);
|
|
mlx5_put_uars_page(mdev, tracker->uar);
|
|
mvdev->log_active = false;
|
|
}
|
|
|
|
int mlx5vf_stop_page_tracker(struct vfio_device *vdev)
|
|
{
|
|
struct mlx5vf_pci_core_device *mvdev = container_of(
|
|
vdev, struct mlx5vf_pci_core_device, core_device.vdev);
|
|
|
|
mutex_lock(&mvdev->state_mutex);
|
|
if (!mvdev->log_active)
|
|
goto end;
|
|
|
|
_mlx5vf_free_page_tracker_resources(mvdev);
|
|
mvdev->log_active = false;
|
|
end:
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
return 0;
|
|
}
|
|
|
|
int mlx5vf_start_page_tracker(struct vfio_device *vdev,
|
|
struct rb_root_cached *ranges, u32 nnodes,
|
|
u64 *page_size)
|
|
{
|
|
struct mlx5vf_pci_core_device *mvdev = container_of(
|
|
vdev, struct mlx5vf_pci_core_device, core_device.vdev);
|
|
struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
|
|
u8 log_tracked_page = ilog2(*page_size);
|
|
struct mlx5_vhca_qp *host_qp;
|
|
struct mlx5_vhca_qp *fw_qp;
|
|
struct mlx5_core_dev *mdev;
|
|
u32 max_msg_size = PAGE_SIZE;
|
|
u64 rq_size = SZ_2M;
|
|
u32 max_recv_wr;
|
|
int err;
|
|
|
|
mutex_lock(&mvdev->state_mutex);
|
|
if (mvdev->mdev_detach) {
|
|
err = -ENOTCONN;
|
|
goto end;
|
|
}
|
|
|
|
if (mvdev->log_active) {
|
|
err = -EINVAL;
|
|
goto end;
|
|
}
|
|
|
|
mdev = mvdev->mdev;
|
|
memset(tracker, 0, sizeof(*tracker));
|
|
tracker->uar = mlx5_get_uars_page(mdev);
|
|
if (IS_ERR(tracker->uar)) {
|
|
err = PTR_ERR(tracker->uar);
|
|
goto end;
|
|
}
|
|
|
|
err = mlx5_core_alloc_pd(mdev, &tracker->pdn);
|
|
if (err)
|
|
goto err_uar;
|
|
|
|
max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size);
|
|
err = mlx5vf_create_cq(mdev, tracker, max_recv_wr);
|
|
if (err)
|
|
goto err_dealloc_pd;
|
|
|
|
host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr);
|
|
if (IS_ERR(host_qp)) {
|
|
err = PTR_ERR(host_qp);
|
|
goto err_cq;
|
|
}
|
|
|
|
host_qp->max_msg_size = max_msg_size;
|
|
if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev,
|
|
pg_track_log_min_page_size)) {
|
|
log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
|
|
pg_track_log_min_page_size);
|
|
} else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev,
|
|
pg_track_log_max_page_size)) {
|
|
log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
|
|
pg_track_log_max_page_size);
|
|
}
|
|
|
|
host_qp->tracked_page_size = (1ULL << log_tracked_page);
|
|
err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn,
|
|
rq_size);
|
|
if (err)
|
|
goto err_host_qp;
|
|
|
|
fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0);
|
|
if (IS_ERR(fw_qp)) {
|
|
err = PTR_ERR(fw_qp);
|
|
goto err_recv_resources;
|
|
}
|
|
|
|
err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true);
|
|
if (err)
|
|
goto err_activate;
|
|
|
|
err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false);
|
|
if (err)
|
|
goto err_activate;
|
|
|
|
tracker->host_qp = host_qp;
|
|
tracker->fw_qp = fw_qp;
|
|
err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes);
|
|
if (err)
|
|
goto err_activate;
|
|
|
|
MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY);
|
|
mlx5_eq_notifier_register(mdev, &tracker->nb);
|
|
*page_size = host_qp->tracked_page_size;
|
|
mvdev->log_active = true;
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
return 0;
|
|
|
|
err_activate:
|
|
mlx5vf_destroy_qp(mdev, fw_qp);
|
|
err_recv_resources:
|
|
mlx5vf_free_qp_recv_resources(mdev, host_qp);
|
|
err_host_qp:
|
|
mlx5vf_destroy_qp(mdev, host_qp);
|
|
err_cq:
|
|
mlx5vf_destroy_cq(mdev, &tracker->cq);
|
|
err_dealloc_pd:
|
|
mlx5_core_dealloc_pd(mdev, tracker->pdn);
|
|
err_uar:
|
|
mlx5_put_uars_page(mdev, tracker->uar);
|
|
end:
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
|
|
struct iova_bitmap *dirty)
|
|
{
|
|
u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
|
|
u32 nent = size / entry_size;
|
|
struct page *page;
|
|
u64 addr;
|
|
u64 *buf;
|
|
int i;
|
|
|
|
if (WARN_ON(index >= qp->recv_buf.npages ||
|
|
(nent > qp->max_msg_size / entry_size)))
|
|
return;
|
|
|
|
page = qp->recv_buf.page_list[index];
|
|
buf = kmap_local_page(page);
|
|
for (i = 0; i < nent; i++) {
|
|
addr = MLX5_GET(page_track_report_entry, buf + i,
|
|
dirty_address_low);
|
|
addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
|
|
dirty_address_high) << 32;
|
|
iova_bitmap_set(dirty, addr, qp->tracked_page_size);
|
|
}
|
|
kunmap_local(buf);
|
|
}
|
|
|
|
static void
|
|
mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe,
|
|
struct iova_bitmap *dirty, int *tracker_status)
|
|
{
|
|
u32 size;
|
|
int ix;
|
|
|
|
qp->rq.cc++;
|
|
*tracker_status = be32_to_cpu(cqe->immediate) >> 28;
|
|
size = be32_to_cpu(cqe->byte_cnt);
|
|
ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1);
|
|
|
|
/* zero length CQE, no data */
|
|
WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING);
|
|
if (size)
|
|
set_report_output(size, ix, qp, dirty);
|
|
|
|
qp->recv_buf.next_rq_offset = ix * qp->max_msg_size;
|
|
mlx5vf_post_recv(qp);
|
|
}
|
|
|
|
static void *get_cqe(struct mlx5_vhca_cq *cq, int n)
|
|
{
|
|
return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
|
|
}
|
|
|
|
static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n)
|
|
{
|
|
void *cqe = get_cqe(cq, n & (cq->ncqe - 1));
|
|
struct mlx5_cqe64 *cqe64;
|
|
|
|
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
|
|
|
|
if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
|
|
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) {
|
|
return cqe64;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static int
|
|
mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp,
|
|
struct iova_bitmap *dirty, int *tracker_status)
|
|
{
|
|
struct mlx5_cqe64 *cqe;
|
|
u8 opcode;
|
|
|
|
cqe = get_sw_cqe(cq, cq->mcq.cons_index);
|
|
if (!cqe)
|
|
return CQ_EMPTY;
|
|
|
|
++cq->mcq.cons_index;
|
|
/*
|
|
* Make sure we read CQ entry contents after we've checked the
|
|
* ownership bit.
|
|
*/
|
|
rmb();
|
|
opcode = get_cqe_opcode(cqe);
|
|
switch (opcode) {
|
|
case MLX5_CQE_RESP_SEND_IMM:
|
|
mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status);
|
|
return CQ_OK;
|
|
default:
|
|
return CQ_POLL_ERR;
|
|
}
|
|
}
|
|
|
|
int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
|
|
unsigned long length,
|
|
struct iova_bitmap *dirty)
|
|
{
|
|
struct mlx5vf_pci_core_device *mvdev = container_of(
|
|
vdev, struct mlx5vf_pci_core_device, core_device.vdev);
|
|
struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
|
|
struct mlx5_vhca_cq *cq = &tracker->cq;
|
|
struct mlx5_core_dev *mdev;
|
|
int poll_err, err;
|
|
|
|
mutex_lock(&mvdev->state_mutex);
|
|
if (!mvdev->log_active) {
|
|
err = -EINVAL;
|
|
goto end;
|
|
}
|
|
|
|
if (mvdev->mdev_detach) {
|
|
err = -ENOTCONN;
|
|
goto end;
|
|
}
|
|
|
|
mdev = mvdev->mdev;
|
|
err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
|
|
MLX5_PAGE_TRACK_STATE_REPORTING);
|
|
if (err)
|
|
goto end;
|
|
|
|
tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING;
|
|
while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING &&
|
|
!tracker->is_err) {
|
|
poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty,
|
|
&tracker->status);
|
|
if (poll_err == CQ_EMPTY) {
|
|
mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
|
|
cq->mcq.cons_index);
|
|
poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp,
|
|
dirty, &tracker->status);
|
|
if (poll_err == CQ_EMPTY) {
|
|
wait_for_completion(&mvdev->tracker_comp);
|
|
continue;
|
|
}
|
|
}
|
|
if (poll_err == CQ_POLL_ERR) {
|
|
err = -EIO;
|
|
goto end;
|
|
}
|
|
mlx5_cq_set_ci(&cq->mcq);
|
|
}
|
|
|
|
if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR)
|
|
tracker->is_err = true;
|
|
|
|
if (tracker->is_err)
|
|
err = -EIO;
|
|
end:
|
|
mlx5vf_state_mutex_unlock(mvdev);
|
|
return err;
|
|
}
|