1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
7 #include <sys/eventfd.h>
9 #include <rte_malloc.h>
10 #include <rte_errno.h>
13 #include <mlx5_common.h>
15 #include "mlx5_vdpa_utils.h"
16 #include "mlx5_vdpa.h"
20 mlx5_vdpa_virtq_handler(void *cb_arg)
22 struct mlx5_vdpa_virtq *virtq = cb_arg;
23 struct mlx5_vdpa_priv *priv = virtq->priv;
27 if (rte_intr_fd_get(virtq->intr_handle) < 0)
31 nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
35 errno == EWOULDBLOCK ||
38 DRV_LOG(ERR, "Failed to read kickfd of virtq %d: %s",
39 virtq->index, strerror(errno));
43 rte_write32(virtq->index, priv->virtq_db_addr);
44 if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
45 if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
46 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_ERR;
48 virtq->notifier_state =
49 MLX5_VDPA_NOTIFIER_STATE_ENABLED;
50 DRV_LOG(INFO, "Virtq %u notifier state is %s.", virtq->index,
51 virtq->notifier_state ==
52 MLX5_VDPA_NOTIFIER_STATE_ENABLED ? "enabled" :
55 DRV_LOG(DEBUG, "Ring virtq %u doorbell.", virtq->index);
59 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
62 int retries = MLX5_VDPA_INTR_RETRIES;
65 if (rte_intr_fd_get(virtq->intr_handle) != -1) {
66 while (retries-- && ret == -EAGAIN) {
67 ret = rte_intr_callback_unregister(virtq->intr_handle,
68 mlx5_vdpa_virtq_handler,
71 DRV_LOG(DEBUG, "Try again to unregister fd %d "
72 "of virtq %d interrupt, retries = %d.",
73 rte_intr_fd_get(virtq->intr_handle),
74 (int)virtq->index, retries);
76 usleep(MLX5_VDPA_INTR_RETRIES_USEC);
79 rte_intr_fd_set(virtq->intr_handle, -1);
81 rte_intr_instance_free(virtq->intr_handle);
83 ret = mlx5_vdpa_virtq_stop(virtq->priv, virtq->index);
85 DRV_LOG(WARNING, "Failed to stop virtq %d.",
87 claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
90 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
91 if (virtq->umems[i].obj)
92 claim_zero(mlx5_glue->devx_umem_dereg
93 (virtq->umems[i].obj));
94 if (virtq->umems[i].buf)
95 rte_free(virtq->umems[i].buf);
97 memset(&virtq->umems, 0, sizeof(virtq->umems));
99 mlx5_vdpa_event_qp_destroy(&virtq->eqp);
100 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED;
105 mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
108 struct mlx5_vdpa_virtq *virtq;
110 for (i = 0; i < priv->nr_virtqs; i++) {
111 virtq = &priv->virtqs[i];
112 mlx5_vdpa_virtq_unset(virtq);
114 claim_zero(mlx5_devx_cmd_destroy(virtq->counters));
116 for (i = 0; i < priv->num_lag_ports; i++) {
118 claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
119 priv->tiss[i] = NULL;
123 claim_zero(mlx5_devx_cmd_destroy(priv->td));
126 if (priv->virtq_db_addr) {
127 claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
128 priv->virtq_db_addr = NULL;
131 memset(priv->virtqs, 0, sizeof(*virtq) * priv->nr_virtqs);
136 mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, int state)
138 struct mlx5_devx_virtq_attr attr = {
139 .type = MLX5_VIRTQ_MODIFY_TYPE_STATE,
140 .state = state ? MLX5_VIRTQ_STATE_RDY :
141 MLX5_VIRTQ_STATE_SUSPEND,
142 .queue_index = virtq->index,
145 return mlx5_devx_cmd_modify_virtq(virtq->virtq, &attr);
149 mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index)
151 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
156 ret = mlx5_vdpa_virtq_modify(virtq, 0);
159 virtq->stopped = true;
160 DRV_LOG(DEBUG, "vid %u virtq %u was stopped.", priv->vid, index);
161 return mlx5_vdpa_virtq_query(priv, index);
165 mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index)
167 struct mlx5_devx_virtq_attr attr = {0};
168 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
171 if (mlx5_devx_cmd_query_virtq(virtq->virtq, &attr)) {
172 DRV_LOG(ERR, "Failed to query virtq %d.", index);
175 DRV_LOG(INFO, "Query vid %d vring %d: hw_available_idx=%d, "
176 "hw_used_index=%d", priv->vid, index,
177 attr.hw_available_index, attr.hw_used_index);
178 ret = rte_vhost_set_vring_base(priv->vid, index,
179 attr.hw_available_index,
182 DRV_LOG(ERR, "Failed to set virtq %d base.", index);
185 if (attr.state == MLX5_VIRTQ_STATE_ERROR)
186 DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu",
187 priv->vid, index, attr.error_type);
192 mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva)
194 struct rte_vhost_mem_region *reg;
198 for (i = 0; i < mem->nregions; i++) {
199 reg = &mem->regions[i];
200 if (hva >= reg->host_user_addr &&
201 hva < reg->host_user_addr + reg->size) {
202 gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
210 mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
212 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
213 struct rte_vhost_vring vq;
214 struct mlx5_devx_virtq_attr attr = {0};
218 uint16_t last_avail_idx;
219 uint16_t last_used_idx;
220 uint16_t event_num = MLX5_EVENT_TYPE_OBJECT_CHANGE;
223 ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
226 virtq->index = index;
227 virtq->vq_size = vq.size;
228 attr.tso_ipv4 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4));
229 attr.tso_ipv6 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6));
230 attr.tx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_CSUM));
231 attr.rx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM));
232 attr.virtio_version_1_0 = !!(priv->features & (1ULL <<
233 VIRTIO_F_VERSION_1));
234 attr.type = (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) ?
235 MLX5_VIRTQ_TYPE_PACKED : MLX5_VIRTQ_TYPE_SPLIT;
237 * No need event QPs creation when the guest in poll mode or when the
238 * capability allows it.
240 attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1 <<
241 MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
242 MLX5_VIRTQ_EVENT_MODE_QP :
243 MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
244 if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
245 ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
248 DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
252 attr.qp_id = virtq->eqp.fw_qp->id;
254 DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode, no"
255 " need event QPs and event mechanism.", index);
257 if (priv->caps.queue_counters_valid) {
258 if (!virtq->counters)
259 virtq->counters = mlx5_devx_cmd_create_virtio_q_counters
261 if (!virtq->counters) {
262 DRV_LOG(ERR, "Failed to create virtq couners for virtq"
266 attr.counters_obj_id = virtq->counters->id;
268 /* Setup 3 UMEMs for each virtq. */
269 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
270 virtq->umems[i].size = priv->caps.umems[i].a * vq.size +
271 priv->caps.umems[i].b;
272 virtq->umems[i].buf = rte_zmalloc(__func__,
273 virtq->umems[i].size, 4096);
274 if (!virtq->umems[i].buf) {
275 DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
279 virtq->umems[i].obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx,
281 virtq->umems[i].size,
282 IBV_ACCESS_LOCAL_WRITE);
283 if (!virtq->umems[i].obj) {
284 DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
288 attr.umems[i].id = virtq->umems[i].obj->umem_id;
289 attr.umems[i].offset = 0;
290 attr.umems[i].size = virtq->umems[i].size;
292 if (attr.type == MLX5_VIRTQ_TYPE_SPLIT) {
293 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
294 (uint64_t)(uintptr_t)vq.desc);
296 DRV_LOG(ERR, "Failed to get descriptor ring GPA.");
299 attr.desc_addr = gpa;
300 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
301 (uint64_t)(uintptr_t)vq.used);
303 DRV_LOG(ERR, "Failed to get GPA for used ring.");
306 attr.used_addr = gpa;
307 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
308 (uint64_t)(uintptr_t)vq.avail);
310 DRV_LOG(ERR, "Failed to get GPA for available ring.");
313 attr.available_addr = gpa;
315 ret = rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
320 DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0");
322 DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d for "
323 "virtq %d.", priv->vid, last_avail_idx,
324 last_used_idx, index);
326 attr.hw_available_index = last_avail_idx;
327 attr.hw_used_index = last_used_idx;
328 attr.q_size = vq.size;
329 attr.mkey = priv->gpa_mkey_index;
330 attr.tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
331 attr.queue_index = index;
332 attr.pd = priv->cdev->pdn;
333 attr.hw_latency_mode = priv->hw_latency_mode;
334 attr.hw_max_latency_us = priv->hw_max_latency_us;
335 attr.hw_max_pending_comp = priv->hw_max_pending_comp;
336 virtq->virtq = mlx5_devx_cmd_create_virtq(priv->cdev->ctx, &attr);
340 claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
341 if (mlx5_vdpa_virtq_modify(virtq, 1))
344 rte_write32(virtq->index, priv->virtq_db_addr);
345 /* Setup doorbell mapping. */
347 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
348 if (virtq->intr_handle == NULL) {
349 DRV_LOG(ERR, "Fail to allocate intr_handle");
353 if (rte_intr_fd_set(virtq->intr_handle, vq.kickfd))
356 if (rte_intr_fd_get(virtq->intr_handle) == -1) {
357 DRV_LOG(WARNING, "Virtq %d kickfd is invalid.", index);
359 if (rte_intr_type_set(virtq->intr_handle, RTE_INTR_HANDLE_EXT))
362 if (rte_intr_callback_register(virtq->intr_handle,
363 mlx5_vdpa_virtq_handler,
365 rte_intr_fd_set(virtq->intr_handle, -1);
366 DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
370 DRV_LOG(DEBUG, "Register fd %d interrupt for virtq %d.",
371 rte_intr_fd_get(virtq->intr_handle),
375 /* Subscribe virtq error event. */
377 cookie = ((uint64_t)virtq->version << 32) + index;
378 ret = mlx5_glue->devx_subscribe_devx_event(priv->err_chnl,
383 DRV_LOG(ERR, "Failed to subscribe device %d virtq %d error event.",
388 virtq->stopped = false;
389 /* Initial notification to ask Qemu handling completed buffers. */
390 if (virtq->eqp.cq.callfd != -1)
391 eventfd_write(virtq->eqp.cq.callfd, (eventfd_t)1);
392 DRV_LOG(DEBUG, "vid %u virtq %u was created successfully.", priv->vid,
396 mlx5_vdpa_virtq_unset(virtq);
401 mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv)
403 if (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) {
404 if (!(priv->caps.virtio_queue_type & (1 <<
405 MLX5_VIRTQ_TYPE_PACKED))) {
406 DRV_LOG(ERR, "Failed to configur PACKED mode for vdev "
407 "%d - it was not reported by HW/driver"
408 " capability.", priv->vid);
412 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) {
413 if (!priv->caps.tso_ipv4) {
414 DRV_LOG(ERR, "Failed to enable TSO4 for vdev %d - TSO4"
415 " was not reported by HW/driver capability.",
420 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) {
421 if (!priv->caps.tso_ipv6) {
422 DRV_LOG(ERR, "Failed to enable TSO6 for vdev %d - TSO6"
423 " was not reported by HW/driver capability.",
428 if (priv->features & (1ULL << VIRTIO_NET_F_CSUM)) {
429 if (!priv->caps.tx_csum) {
430 DRV_LOG(ERR, "Failed to enable CSUM for vdev %d - CSUM"
431 " was not reported by HW/driver capability.",
436 if (priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
437 if (!priv->caps.rx_csum) {
438 DRV_LOG(ERR, "Failed to enable GUEST CSUM for vdev %d"
439 " GUEST CSUM was not reported by HW/driver "
440 "capability.", priv->vid);
444 if (priv->features & (1ULL << VIRTIO_F_VERSION_1)) {
445 if (!priv->caps.virtio_version_1_0) {
446 DRV_LOG(ERR, "Failed to enable version 1 for vdev %d "
447 "version 1 was not reported by HW/driver"
448 " capability.", priv->vid);
456 mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
458 struct mlx5_devx_tis_attr tis_attr = {0};
459 struct ibv_context *ctx = priv->cdev->ctx;
461 uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
462 int ret = rte_vhost_get_negotiated_features(priv->vid, &priv->features);
464 if (ret || mlx5_vdpa_features_validate(priv)) {
465 DRV_LOG(ERR, "Failed to configure negotiated features.");
468 if ((priv->features & (1ULL << VIRTIO_NET_F_CSUM)) == 0 &&
469 ((priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) > 0 ||
470 (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) > 0)) {
471 /* Packet may be corrupted if TSO is enabled without CSUM. */
472 DRV_LOG(INFO, "TSO is enabled without CSUM, force CSUM.");
473 priv->features |= (1ULL << VIRTIO_NET_F_CSUM);
475 if (nr_vring > priv->caps.max_num_virtio_queues * 2) {
476 DRV_LOG(ERR, "Do not support more than %d virtqs(%d).",
477 (int)priv->caps.max_num_virtio_queues * 2,
481 /* Always map the entire page. */
482 priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
483 PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
484 priv->var->mmap_off);
485 if (priv->virtq_db_addr == MAP_FAILED) {
486 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
487 priv->virtq_db_addr = NULL;
490 DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
491 priv->virtq_db_addr);
493 priv->td = mlx5_devx_cmd_create_td(ctx);
495 DRV_LOG(ERR, "Failed to create transport domain.");
498 tis_attr.transport_domain = priv->td->id;
499 for (i = 0; i < priv->num_lag_ports; i++) {
500 /* 0 is auto affinity, non-zero value to propose port. */
501 tis_attr.lag_tx_port_affinity = i + 1;
502 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
503 if (!priv->tiss[i]) {
504 DRV_LOG(ERR, "Failed to create TIS %u.", i);
508 priv->nr_virtqs = nr_vring;
509 for (i = 0; i < nr_vring; i++)
510 if (priv->virtqs[i].enable && mlx5_vdpa_virtq_setup(priv, i))
514 mlx5_vdpa_virtqs_release(priv);
519 mlx5_vdpa_virtq_is_modified(struct mlx5_vdpa_priv *priv,
520 struct mlx5_vdpa_virtq *virtq)
522 struct rte_vhost_vring vq;
523 int ret = rte_vhost_get_vhost_vring(priv->vid, virtq->index, &vq);
527 if (vq.size != virtq->vq_size || vq.kickfd !=
528 rte_intr_fd_get(virtq->intr_handle))
530 if (virtq->eqp.cq.cq_obj.cq) {
531 if (vq.callfd != virtq->eqp.cq.callfd)
533 } else if (vq.callfd != -1) {
540 mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int index, int enable)
542 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
545 DRV_LOG(INFO, "Update virtq %d status %sable -> %sable.", index,
546 virtq->enable ? "en" : "dis", enable ? "en" : "dis");
547 if (!priv->configured) {
548 virtq->enable = !!enable;
551 if (virtq->enable == !!enable) {
554 ret = mlx5_vdpa_virtq_is_modified(priv, virtq);
556 DRV_LOG(ERR, "Virtq %d modify check failed.", index);
561 DRV_LOG(INFO, "Virtq %d was modified, recreate it.", index);
565 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
566 ret = mlx5_vdpa_steer_update(priv);
568 DRV_LOG(WARNING, "Failed to disable steering "
569 "for virtq %d.", index);
571 mlx5_vdpa_virtq_unset(virtq);
574 ret = mlx5_vdpa_virtq_setup(priv, index);
576 DRV_LOG(ERR, "Failed to setup virtq %d.", index);
580 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
581 ret = mlx5_vdpa_steer_update(priv);
583 DRV_LOG(WARNING, "Failed to enable steering "
584 "for virtq %d.", index);
591 mlx5_vdpa_virtq_stats_get(struct mlx5_vdpa_priv *priv, int qid,
592 struct rte_vdpa_stat *stats, unsigned int n)
594 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
595 struct mlx5_devx_virtio_q_couners_attr attr = {0};
598 if (!virtq->counters) {
599 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
603 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters, &attr);
605 DRV_LOG(ERR, "Failed to read virtq %d stats from HW.", qid);
608 ret = (int)RTE_MIN(n, (unsigned int)MLX5_VDPA_STATS_MAX);
609 if (ret == MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS)
611 stats[MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS] = (struct rte_vdpa_stat) {
612 .id = MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS,
613 .value = attr.received_desc - virtq->reset.received_desc,
615 if (ret == MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS)
617 stats[MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS] = (struct rte_vdpa_stat) {
618 .id = MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS,
619 .value = attr.completed_desc - virtq->reset.completed_desc,
621 if (ret == MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS)
623 stats[MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS] = (struct rte_vdpa_stat) {
624 .id = MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS,
625 .value = attr.bad_desc_errors - virtq->reset.bad_desc_errors,
627 if (ret == MLX5_VDPA_STATS_EXCEED_MAX_CHAIN)
629 stats[MLX5_VDPA_STATS_EXCEED_MAX_CHAIN] = (struct rte_vdpa_stat) {
630 .id = MLX5_VDPA_STATS_EXCEED_MAX_CHAIN,
631 .value = attr.exceed_max_chain - virtq->reset.exceed_max_chain,
633 if (ret == MLX5_VDPA_STATS_INVALID_BUFFER)
635 stats[MLX5_VDPA_STATS_INVALID_BUFFER] = (struct rte_vdpa_stat) {
636 .id = MLX5_VDPA_STATS_INVALID_BUFFER,
637 .value = attr.invalid_buffer - virtq->reset.invalid_buffer,
639 if (ret == MLX5_VDPA_STATS_COMPLETION_ERRORS)
641 stats[MLX5_VDPA_STATS_COMPLETION_ERRORS] = (struct rte_vdpa_stat) {
642 .id = MLX5_VDPA_STATS_COMPLETION_ERRORS,
643 .value = attr.error_cqes - virtq->reset.error_cqes,
649 mlx5_vdpa_virtq_stats_reset(struct mlx5_vdpa_priv *priv, int qid)
651 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
654 if (!virtq->counters) {
655 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
659 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters,
662 DRV_LOG(ERR, "Failed to read virtq %d reset stats from HW.",