};
struct mlx5_vdpa_query_mr {
- SLIST_ENTRY(mlx5_vdpa_query_mr) next;
union {
struct ibv_mr *mr;
struct mlx5_devx_obj *mkey;
#define MLX5_VDPA_MAX_C_THRD 256
#define MLX5_VDPA_MAX_TASKS_PER_THRD 4096
#define MLX5_VDPA_TASKS_PER_DEV 64
+#define MLX5_VDPA_MAX_MRS 0xFFFF
+
+/* Vdpa task types. */
+enum mlx5_vdpa_task_type {
+ MLX5_VDPA_TASK_REG_MR = 1,
+};
/* Generic task information and size must be multiple of 4B. */
struct mlx5_vdpa_task {
struct mlx5_vdpa_priv *priv;
+ enum mlx5_vdpa_task_type type;
uint32_t *remaining_cnt;
uint32_t *err_cnt;
uint32_t idx;
};
extern struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
+struct mlx5_vdpa_vmem_info {
+ struct rte_vhost_memory *vmem;
+ uint32_t entries_num;
+ uint64_t gcd;
+ uint64_t size;
+ uint8_t mode;
+};
+
struct mlx5_vdpa_virtq {
SLIST_ENTRY(mlx5_vdpa_virtq) next;
uint8_t enable;
struct mlx5_hca_vdpa_attr caps;
uint32_t gpa_mkey_index;
struct ibv_mr *null_mr;
- struct rte_vhost_memory *vmem;
+ struct mlx5_vdpa_vmem_info vmem_info;
struct mlx5dv_devx_event_channel *eventc;
struct mlx5dv_devx_event_channel *err_chnl;
struct mlx5_uar uar;
uint8_t num_lag_ports;
uint64_t features; /* Negotiated features. */
uint16_t log_max_rqt_size;
+ uint16_t last_c_thrd_idx;
+ uint16_t num_mrs; /* Number of memory regions. */
struct mlx5_vdpa_steer steer;
struct mlx5dv_var *var;
void *virtq_db_addr;
struct mlx5_pmd_wrapped_mr lm_mr;
- SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list;
+ struct mlx5_vdpa_query_mr **mrs;
struct mlx5_vdpa_virtq virtqs[];
};
bool
mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv,
uint32_t thrd_idx,
- uint32_t num);
+ enum mlx5_vdpa_task_type task_type,
+ uint32_t *bulk_refcnt, uint32_t *bulk_err_cnt,
+ void **task_data, uint32_t num);
+int
+mlx5_vdpa_register_mr(struct mlx5_vdpa_priv *priv, uint32_t idx);
+bool
+mlx5_vdpa_c_thread_wait_bulk_tasks_done(uint32_t *remaining_cnt,
+ uint32_t *err_cnt, uint32_t sleep_time);
#endif /* RTE_PMD_MLX5_VDPA_H_ */
bool
mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv,
uint32_t thrd_idx,
- uint32_t num)
+ enum mlx5_vdpa_task_type task_type,
+ uint32_t *remaining_cnt, uint32_t *err_cnt,
+ void **task_data, uint32_t num)
{
struct rte_ring *rng = conf_thread_mng.cthrd[thrd_idx].rng;
struct mlx5_vdpa_task task[MLX5_VDPA_TASKS_PER_DEV];
+ uint32_t *data = (uint32_t *)task_data;
uint32_t i;
MLX5_ASSERT(num <= MLX5_VDPA_TASKS_PER_DEV);
for (i = 0 ; i < num; i++) {
task[i].priv = priv;
/* To be added later. */
+ task[i].type = task_type;
+ task[i].remaining_cnt = remaining_cnt;
+ task[i].err_cnt = err_cnt;
+ task[i].idx = data[i];
}
if (!mlx5_vdpa_c_thrd_ring_enqueue_bulk(rng, (void **)&task, num, NULL))
return -1;
return 0;
}
+bool
+mlx5_vdpa_c_thread_wait_bulk_tasks_done(uint32_t *remaining_cnt,
+ uint32_t *err_cnt, uint32_t sleep_time)
+{
+ /* Check and wait all tasks done. */
+ while (__atomic_load_n(remaining_cnt,
+ __ATOMIC_RELAXED) != 0) {
+ rte_delay_us_sleep(sleep_time);
+ }
+ if (__atomic_load_n(err_cnt,
+ __ATOMIC_RELAXED)) {
+ DRV_LOG(ERR, "Tasks done with error.");
+ return true;
+ }
+ return false;
+}
+
static void *
mlx5_vdpa_c_thread_handle(void *arg)
{
struct rte_ring *rng;
uint32_t thrd_idx;
uint32_t task_num;
+ int ret;
for (thrd_idx = 0; thrd_idx < multhrd->max_thrds;
thrd_idx++)
&multhrd->cthrd[thrd_idx].c_cond,
&multhrd->cthrd_lock);
pthread_mutex_unlock(&multhrd->cthrd_lock);
+ continue;
}
priv = task.priv;
if (priv == NULL)
continue;
- __atomic_fetch_sub(task.remaining_cnt,
+ switch (task.type) {
+ case MLX5_VDPA_TASK_REG_MR:
+ ret = mlx5_vdpa_register_mr(priv, task.idx);
+ if (ret) {
+ DRV_LOG(ERR,
+ "Failed to register mr %d.", task.idx);
+ __atomic_fetch_add(task.err_cnt, 1,
+ __ATOMIC_RELAXED);
+ }
+ break;
+ default:
+ DRV_LOG(ERR, "Invalid vdpa task type %d.",
+ task.type);
+ break;
+ }
+ if (task.remaining_cnt)
+ __atomic_fetch_sub(task.remaining_cnt,
1, __ATOMIC_RELAXED);
- /* To be added later. */
}
return NULL;
}
void
mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
{
+ struct mlx5_vdpa_query_mr *mrs =
+ (struct mlx5_vdpa_query_mr *)priv->mrs;
struct mlx5_vdpa_query_mr *entry;
- struct mlx5_vdpa_query_mr *next;
+ int i;
- entry = SLIST_FIRST(&priv->mr_list);
- while (entry) {
- next = SLIST_NEXT(entry, next);
- if (entry->is_indirect)
- claim_zero(mlx5_devx_cmd_destroy(entry->mkey));
- else
- claim_zero(mlx5_glue->dereg_mr(entry->mr));
- SLIST_REMOVE(&priv->mr_list, entry, mlx5_vdpa_query_mr, next);
- rte_free(entry);
- entry = next;
+ if (priv->mrs) {
+ for (i = priv->num_mrs - 1; i >= 0; i--) {
+ entry = &mrs[i];
+ if (entry->is_indirect) {
+ if (entry->mkey)
+ claim_zero(
+ mlx5_devx_cmd_destroy(entry->mkey));
+ } else {
+ if (entry->mr)
+ claim_zero(
+ mlx5_glue->dereg_mr(entry->mr));
+ }
+ }
+ rte_free(priv->mrs);
+ priv->mrs = NULL;
+ priv->num_mrs = 0;
}
- SLIST_INIT(&priv->mr_list);
- if (priv->vmem) {
- free(priv->vmem);
- priv->vmem = NULL;
+ if (priv->vmem_info.vmem) {
+ free(priv->vmem_info.vmem);
+ priv->vmem_info.vmem = NULL;
}
+ priv->gpa_mkey_index = 0;
}
static int
#define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
MLX5_MAX_KLM_BYTE_COUNT : (sz))
-/*
- * The target here is to group all the physical memory regions of the
- * virtio device in one indirect mkey.
- * For KLM Fixed Buffer Size mode (HW find the translation entry in one
- * read according to the guest physical address):
- * All the sub-direct mkeys of it must be in the same size, hence, each
- * one of them should be in the GCD size of all the virtio memory
- * regions and the holes between them.
- * For KLM mode (each entry may be in different size so HW must iterate
- * the entries):
- * Each virtio memory region and each hole between them have one entry,
- * just need to cover the maximum allowed size(2G) by splitting entries
- * which their associated memory regions are bigger than 2G.
- * It means that each virtio memory region may be mapped to more than
- * one direct mkey in the 2 modes.
- * All the holes of invalid memory between the virtio memory regions
- * will be mapped to the null memory region for security.
- */
-int
-mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
+static int
+mlx5_vdpa_create_indirect_mkey(struct mlx5_vdpa_priv *priv)
{
struct mlx5_devx_mkey_attr mkey_attr;
- struct mlx5_vdpa_query_mr *entry = NULL;
- struct rte_vhost_mem_region *reg = NULL;
- uint8_t mode = 0;
- uint32_t entries_num = 0;
- uint32_t i;
- uint64_t gcd = 0;
+ struct mlx5_vdpa_query_mr *mrs =
+ (struct mlx5_vdpa_query_mr *)priv->mrs;
+ struct mlx5_vdpa_query_mr *entry;
+ struct rte_vhost_mem_region *reg;
+ uint8_t mode = priv->vmem_info.mode;
+ uint32_t entries_num = priv->vmem_info.entries_num;
+ struct rte_vhost_memory *mem = priv->vmem_info.vmem;
+ struct mlx5_klm klm_array[entries_num];
+ uint64_t gcd = priv->vmem_info.gcd;
+ int ret = -rte_errno;
uint64_t klm_size;
- uint64_t mem_size;
- uint64_t k;
int klm_index = 0;
- int ret;
- struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
- (priv->vid, &mode, &mem_size, &gcd, &entries_num);
- struct mlx5_klm klm_array[entries_num];
+ uint64_t k;
+ uint32_t i;
- if (!mem)
- return -rte_errno;
- if (priv->vmem != NULL) {
- if (mlx5_vdpa_mem_cmp(mem, priv->vmem) == 0) {
- /* VM memory not changed, reuse resources. */
- free(mem);
- return 0;
- }
- mlx5_vdpa_mem_dereg(priv);
- }
- priv->vmem = mem;
+ /* If it is the last entry, create indirect mkey. */
for (i = 0; i < mem->nregions; i++) {
+ entry = &mrs[i];
reg = &mem->regions[i];
- entry = rte_zmalloc(__func__, sizeof(*entry), 0);
- if (!entry) {
- ret = -ENOMEM;
- DRV_LOG(ERR, "Failed to allocate mem entry memory.");
- goto error;
- }
- entry->mr = mlx5_glue->reg_mr_iova(priv->cdev->pd,
- (void *)(uintptr_t)(reg->host_user_addr),
- reg->size, reg->guest_phys_addr,
- IBV_ACCESS_LOCAL_WRITE);
- if (!entry->mr) {
- DRV_LOG(ERR, "Failed to create direct Mkey.");
- ret = -rte_errno;
- goto error;
- }
- entry->is_indirect = 0;
if (i > 0) {
uint64_t sadd;
uint64_t empty_region_sz = reg->guest_phys_addr -
klm_array[klm_index].address = reg->guest_phys_addr + k;
klm_index++;
}
- SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
}
memset(&mkey_attr, 0, sizeof(mkey_attr));
mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr);
- mkey_attr.size = mem_size;
+ mkey_attr.size = priv->vmem_info.size;
mkey_attr.pd = priv->cdev->pdn;
mkey_attr.umem_id = 0;
/* Must be zero for KLM mode. */
mkey_attr.pg_access = 0;
mkey_attr.klm_array = klm_array;
mkey_attr.klm_num = klm_index;
- entry = rte_zmalloc(__func__, sizeof(*entry), 0);
- if (!entry) {
- DRV_LOG(ERR, "Failed to allocate memory for indirect entry.");
- ret = -ENOMEM;
- goto error;
- }
+ entry = &mrs[mem->nregions];
entry->mkey = mlx5_devx_cmd_mkey_create(priv->cdev->ctx, &mkey_attr);
if (!entry->mkey) {
DRV_LOG(ERR, "Failed to create indirect Mkey.");
- ret = -rte_errno;
- goto error;
+ rte_errno = -ret;
+ return ret;
}
entry->is_indirect = 1;
- SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
priv->gpa_mkey_index = entry->mkey->id;
return 0;
+}
+
+/*
+ * The target here is to group all the physical memory regions of the
+ * virtio device in one indirect mkey.
+ * For KLM Fixed Buffer Size mode (HW find the translation entry in one
+ * read according to the guest phisical address):
+ * All the sub-direct mkeys of it must be in the same size, hence, each
+ * one of them should be in the GCD size of all the virtio memory
+ * regions and the holes between them.
+ * For KLM mode (each entry may be in different size so HW must iterate
+ * the entries):
+ * Each virtio memory region and each hole between them have one entry,
+ * just need to cover the maximum allowed size(2G) by splitting entries
+ * which their associated memory regions are bigger than 2G.
+ * It means that each virtio memory region may be mapped to more than
+ * one direct mkey in the 2 modes.
+ * All the holes of invalid memory between the virtio memory regions
+ * will be mapped to the null memory region for security.
+ */
+int
+mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
+{
+ void *mrs;
+ uint8_t mode = 0;
+ int ret = -rte_errno;
+ uint32_t i, thrd_idx, data[1];
+ uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0;
+ struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
+ (priv->vid, &mode, &priv->vmem_info.size,
+ &priv->vmem_info.gcd, &priv->vmem_info.entries_num);
+
+ if (!mem)
+ return -rte_errno;
+ if (priv->vmem_info.vmem != NULL) {
+ if (mlx5_vdpa_mem_cmp(mem, priv->vmem_info.vmem) == 0) {
+ /* VM memory not changed, reuse resources. */
+ free(mem);
+ return 0;
+ }
+ mlx5_vdpa_mem_dereg(priv);
+ }
+ priv->vmem_info.vmem = mem;
+ priv->vmem_info.mode = mode;
+ priv->num_mrs = mem->nregions;
+ if (!priv->num_mrs || priv->num_mrs >= MLX5_VDPA_MAX_MRS) {
+ DRV_LOG(ERR,
+ "Invalid number of memory regions.");
+ goto error;
+ }
+ /* The last one is indirect mkey entry. */
+ priv->num_mrs++;
+ mrs = rte_zmalloc("mlx5 vDPA memory regions",
+ sizeof(struct mlx5_vdpa_query_mr) * priv->num_mrs, 0);
+ priv->mrs = mrs;
+ if (!priv->mrs) {
+ DRV_LOG(ERR, "Failed to allocate private memory regions.");
+ goto error;
+ }
+ if (priv->use_c_thread) {
+ uint32_t main_task_idx[mem->nregions];
+
+ for (i = 0; i < mem->nregions; i++) {
+ thrd_idx = i % (conf_thread_mng.max_thrds + 1);
+ if (!thrd_idx) {
+ main_task_idx[task_num] = i;
+ task_num++;
+ continue;
+ }
+ thrd_idx = priv->last_c_thrd_idx + 1;
+ if (thrd_idx >= conf_thread_mng.max_thrds)
+ thrd_idx = 0;
+ priv->last_c_thrd_idx = thrd_idx;
+ data[0] = i;
+ if (mlx5_vdpa_task_add(priv, thrd_idx,
+ MLX5_VDPA_TASK_REG_MR,
+ &remaining_cnt, &err_cnt,
+ (void **)&data, 1)) {
+ DRV_LOG(ERR,
+ "Fail to add task mem region (%d)", i);
+ main_task_idx[task_num] = i;
+ task_num++;
+ }
+ }
+ for (i = 0; i < task_num; i++) {
+ ret = mlx5_vdpa_register_mr(priv,
+ main_task_idx[i]);
+ if (ret) {
+ DRV_LOG(ERR,
+ "Failed to register mem region %d.", i);
+ goto error;
+ }
+ }
+ if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
+ &err_cnt, 100)) {
+ DRV_LOG(ERR,
+ "Failed to wait register mem region tasks ready.");
+ goto error;
+ }
+ } else {
+ for (i = 0; i < mem->nregions; i++) {
+ ret = mlx5_vdpa_register_mr(priv, i);
+ if (ret) {
+ DRV_LOG(ERR,
+ "Failed to register mem region %d.", i);
+ goto error;
+ }
+ }
+ }
+ ret = mlx5_vdpa_create_indirect_mkey(priv);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to create indirect mkey .");
+ goto error;
+ }
+ return 0;
error:
- rte_free(entry);
mlx5_vdpa_mem_dereg(priv);
rte_errno = -ret;
return ret;
}
+
+int
+mlx5_vdpa_register_mr(struct mlx5_vdpa_priv *priv, uint32_t idx)
+{
+ struct rte_vhost_memory *mem = priv->vmem_info.vmem;
+ struct mlx5_vdpa_query_mr *mrs =
+ (struct mlx5_vdpa_query_mr *)priv->mrs;
+ struct mlx5_vdpa_query_mr *entry;
+ struct rte_vhost_mem_region *reg;
+ int ret;
+
+ reg = &mem->regions[idx];
+ entry = &mrs[idx];
+ entry->mr = mlx5_glue->reg_mr_iova
+ (priv->cdev->pd,
+ (void *)(uintptr_t)(reg->host_user_addr),
+ reg->size, reg->guest_phys_addr,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!entry->mr) {
+ DRV_LOG(ERR, "Failed to create direct Mkey.");
+ ret = -rte_errno;
+ return ret;
+ }
+ entry->is_indirect = 0;
+ return 0;
+}