1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
7 #include <sys/eventfd.h>
9 #include <rte_malloc.h>
10 #include <rte_errno.h>
13 #include <mlx5_common.h>
15 #include "mlx5_vdpa_utils.h"
16 #include "mlx5_vdpa.h"
20 mlx5_vdpa_virtq_handler(void *cb_arg)
22 struct mlx5_vdpa_virtq *virtq = cb_arg;
23 struct mlx5_vdpa_priv *priv = virtq->priv;
28 nbytes = read(virtq->intr_handle.fd, &buf, 8);
31 errno == EWOULDBLOCK ||
34 DRV_LOG(ERR, "Failed to read kickfd of virtq %d: %s",
35 virtq->index, strerror(errno));
39 rte_write32(virtq->index, priv->virtq_db_addr);
40 if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
41 if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
42 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_ERR;
44 virtq->notifier_state =
45 MLX5_VDPA_NOTIFIER_STATE_ENABLED;
46 DRV_LOG(INFO, "Virtq %u notifier state is %s.", virtq->index,
47 virtq->notifier_state ==
48 MLX5_VDPA_NOTIFIER_STATE_ENABLED ? "enabled" :
51 DRV_LOG(DEBUG, "Ring virtq %u doorbell.", virtq->index);
55 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
58 int retries = MLX5_VDPA_INTR_RETRIES;
61 if (virtq->intr_handle.fd != -1) {
62 while (retries-- && ret == -EAGAIN) {
63 ret = rte_intr_callback_unregister(&virtq->intr_handle,
64 mlx5_vdpa_virtq_handler,
67 DRV_LOG(DEBUG, "Try again to unregister fd %d "
68 "of virtq %d interrupt, retries = %d.",
69 virtq->intr_handle.fd,
70 (int)virtq->index, retries);
71 usleep(MLX5_VDPA_INTR_RETRIES_USEC);
74 virtq->intr_handle.fd = -1;
77 ret = mlx5_vdpa_virtq_stop(virtq->priv, virtq->index);
79 DRV_LOG(WARNING, "Failed to stop virtq %d.",
81 claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
84 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
85 if (virtq->umems[i].obj)
86 claim_zero(mlx5_glue->devx_umem_dereg
87 (virtq->umems[i].obj));
88 if (virtq->umems[i].buf)
89 rte_free(virtq->umems[i].buf);
91 memset(&virtq->umems, 0, sizeof(virtq->umems));
93 mlx5_vdpa_event_qp_destroy(&virtq->eqp);
94 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED;
99 mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
102 struct mlx5_vdpa_virtq *virtq;
104 for (i = 0; i < priv->nr_virtqs; i++) {
105 virtq = &priv->virtqs[i];
106 mlx5_vdpa_virtq_unset(virtq);
108 claim_zero(mlx5_devx_cmd_destroy(virtq->counters));
110 for (i = 0; i < priv->num_lag_ports; i++) {
112 claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
113 priv->tiss[i] = NULL;
117 claim_zero(mlx5_devx_cmd_destroy(priv->td));
120 if (priv->virtq_db_addr) {
121 claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
122 priv->virtq_db_addr = NULL;
125 memset(priv->virtqs, 0, sizeof(*virtq) * priv->nr_virtqs);
130 mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, int state)
132 struct mlx5_devx_virtq_attr attr = {
133 .type = MLX5_VIRTQ_MODIFY_TYPE_STATE,
134 .state = state ? MLX5_VIRTQ_STATE_RDY :
135 MLX5_VIRTQ_STATE_SUSPEND,
136 .queue_index = virtq->index,
139 return mlx5_devx_cmd_modify_virtq(virtq->virtq, &attr);
143 mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index)
145 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
150 ret = mlx5_vdpa_virtq_modify(virtq, 0);
153 virtq->stopped = true;
154 DRV_LOG(DEBUG, "vid %u virtq %u was stopped.", priv->vid, index);
155 return mlx5_vdpa_virtq_query(priv, index);
159 mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index)
161 struct mlx5_devx_virtq_attr attr = {0};
162 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
165 if (mlx5_devx_cmd_query_virtq(virtq->virtq, &attr)) {
166 DRV_LOG(ERR, "Failed to query virtq %d.", index);
169 DRV_LOG(INFO, "Query vid %d vring %d: hw_available_idx=%d, "
170 "hw_used_index=%d", priv->vid, index,
171 attr.hw_available_index, attr.hw_used_index);
172 ret = rte_vhost_set_vring_base(priv->vid, index,
173 attr.hw_available_index,
176 DRV_LOG(ERR, "Failed to set virtq %d base.", index);
179 if (attr.state == MLX5_VIRTQ_STATE_ERROR)
180 DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu",
181 priv->vid, index, attr.error_type);
186 mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva)
188 struct rte_vhost_mem_region *reg;
192 for (i = 0; i < mem->nregions; i++) {
193 reg = &mem->regions[i];
194 if (hva >= reg->host_user_addr &&
195 hva < reg->host_user_addr + reg->size) {
196 gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
204 mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
206 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
207 struct rte_vhost_vring vq;
208 struct mlx5_devx_virtq_attr attr = {0};
212 uint16_t last_avail_idx;
213 uint16_t last_used_idx;
214 uint16_t event_num = MLX5_EVENT_TYPE_OBJECT_CHANGE;
217 ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
220 virtq->index = index;
221 virtq->vq_size = vq.size;
222 attr.tso_ipv4 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4));
223 attr.tso_ipv6 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6));
224 attr.tx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_CSUM));
225 attr.rx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM));
226 attr.virtio_version_1_0 = !!(priv->features & (1ULL <<
227 VIRTIO_F_VERSION_1));
228 attr.type = (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) ?
229 MLX5_VIRTQ_TYPE_PACKED : MLX5_VIRTQ_TYPE_SPLIT;
231 * No need event QPs creation when the guest in poll mode or when the
232 * capability allows it.
234 attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1 <<
235 MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
236 MLX5_VIRTQ_EVENT_MODE_QP :
237 MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
238 if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
239 ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
242 DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
246 attr.qp_id = virtq->eqp.fw_qp->id;
248 DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode, no"
249 " need event QPs and event mechanism.", index);
251 if (priv->caps.queue_counters_valid) {
252 if (!virtq->counters)
253 virtq->counters = mlx5_devx_cmd_create_virtio_q_counters
255 if (!virtq->counters) {
256 DRV_LOG(ERR, "Failed to create virtq couners for virtq"
260 attr.counters_obj_id = virtq->counters->id;
262 /* Setup 3 UMEMs for each virtq. */
263 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
264 virtq->umems[i].size = priv->caps.umems[i].a * vq.size +
265 priv->caps.umems[i].b;
266 virtq->umems[i].buf = rte_zmalloc(__func__,
267 virtq->umems[i].size, 4096);
268 if (!virtq->umems[i].buf) {
269 DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
273 virtq->umems[i].obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx,
275 virtq->umems[i].size,
276 IBV_ACCESS_LOCAL_WRITE);
277 if (!virtq->umems[i].obj) {
278 DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
282 attr.umems[i].id = virtq->umems[i].obj->umem_id;
283 attr.umems[i].offset = 0;
284 attr.umems[i].size = virtq->umems[i].size;
286 if (attr.type == MLX5_VIRTQ_TYPE_SPLIT) {
287 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
288 (uint64_t)(uintptr_t)vq.desc);
290 DRV_LOG(ERR, "Failed to get descriptor ring GPA.");
293 attr.desc_addr = gpa;
294 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
295 (uint64_t)(uintptr_t)vq.used);
297 DRV_LOG(ERR, "Failed to get GPA for used ring.");
300 attr.used_addr = gpa;
301 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
302 (uint64_t)(uintptr_t)vq.avail);
304 DRV_LOG(ERR, "Failed to get GPA for available ring.");
307 attr.available_addr = gpa;
309 ret = rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
314 DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0");
316 DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d for "
317 "virtq %d.", priv->vid, last_avail_idx,
318 last_used_idx, index);
320 attr.hw_available_index = last_avail_idx;
321 attr.hw_used_index = last_used_idx;
322 attr.q_size = vq.size;
323 attr.mkey = priv->gpa_mkey_index;
324 attr.tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
325 attr.queue_index = index;
326 attr.pd = priv->cdev->pdn;
327 attr.hw_latency_mode = priv->hw_latency_mode;
328 attr.hw_max_latency_us = priv->hw_max_latency_us;
329 attr.hw_max_pending_comp = priv->hw_max_pending_comp;
330 virtq->virtq = mlx5_devx_cmd_create_virtq(priv->cdev->ctx, &attr);
334 claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
335 if (mlx5_vdpa_virtq_modify(virtq, 1))
338 rte_write32(virtq->index, priv->virtq_db_addr);
339 /* Setup doorbell mapping. */
340 virtq->intr_handle.fd = vq.kickfd;
341 if (virtq->intr_handle.fd == -1) {
342 DRV_LOG(WARNING, "Virtq %d kickfd is invalid.", index);
344 virtq->intr_handle.type = RTE_INTR_HANDLE_EXT;
345 if (rte_intr_callback_register(&virtq->intr_handle,
346 mlx5_vdpa_virtq_handler,
348 virtq->intr_handle.fd = -1;
349 DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
353 DRV_LOG(DEBUG, "Register fd %d interrupt for virtq %d.",
354 virtq->intr_handle.fd, index);
357 /* Subscribe virtq error event. */
359 cookie = ((uint64_t)virtq->version << 32) + index;
360 ret = mlx5_glue->devx_subscribe_devx_event(priv->err_chnl,
365 DRV_LOG(ERR, "Failed to subscribe device %d virtq %d error event.",
370 virtq->stopped = false;
371 /* Initial notification to ask Qemu handling completed buffers. */
372 if (virtq->eqp.cq.callfd != -1)
373 eventfd_write(virtq->eqp.cq.callfd, (eventfd_t)1);
374 DRV_LOG(DEBUG, "vid %u virtq %u was created successfully.", priv->vid,
378 mlx5_vdpa_virtq_unset(virtq);
383 mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv)
385 if (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) {
386 if (!(priv->caps.virtio_queue_type & (1 <<
387 MLX5_VIRTQ_TYPE_PACKED))) {
388 DRV_LOG(ERR, "Failed to configur PACKED mode for vdev "
389 "%d - it was not reported by HW/driver"
390 " capability.", priv->vid);
394 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) {
395 if (!priv->caps.tso_ipv4) {
396 DRV_LOG(ERR, "Failed to enable TSO4 for vdev %d - TSO4"
397 " was not reported by HW/driver capability.",
402 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) {
403 if (!priv->caps.tso_ipv6) {
404 DRV_LOG(ERR, "Failed to enable TSO6 for vdev %d - TSO6"
405 " was not reported by HW/driver capability.",
410 if (priv->features & (1ULL << VIRTIO_NET_F_CSUM)) {
411 if (!priv->caps.tx_csum) {
412 DRV_LOG(ERR, "Failed to enable CSUM for vdev %d - CSUM"
413 " was not reported by HW/driver capability.",
418 if (priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
419 if (!priv->caps.rx_csum) {
420 DRV_LOG(ERR, "Failed to enable GUEST CSUM for vdev %d"
421 " GUEST CSUM was not reported by HW/driver "
422 "capability.", priv->vid);
426 if (priv->features & (1ULL << VIRTIO_F_VERSION_1)) {
427 if (!priv->caps.virtio_version_1_0) {
428 DRV_LOG(ERR, "Failed to enable version 1 for vdev %d "
429 "version 1 was not reported by HW/driver"
430 " capability.", priv->vid);
438 mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
440 struct mlx5_devx_tis_attr tis_attr = {0};
441 struct ibv_context *ctx = priv->cdev->ctx;
443 uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
444 int ret = rte_vhost_get_negotiated_features(priv->vid, &priv->features);
446 if (ret || mlx5_vdpa_features_validate(priv)) {
447 DRV_LOG(ERR, "Failed to configure negotiated features.");
450 if ((priv->features & (1ULL << VIRTIO_NET_F_CSUM)) == 0 &&
451 ((priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) > 0 ||
452 (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) > 0)) {
453 /* Packet may be corrupted if TSO is enabled without CSUM. */
454 DRV_LOG(INFO, "TSO is enabled without CSUM, force CSUM.");
455 priv->features |= (1ULL << VIRTIO_NET_F_CSUM);
457 if (nr_vring > priv->caps.max_num_virtio_queues * 2) {
458 DRV_LOG(ERR, "Do not support more than %d virtqs(%d).",
459 (int)priv->caps.max_num_virtio_queues * 2,
463 /* Always map the entire page. */
464 priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
465 PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
466 priv->var->mmap_off);
467 if (priv->virtq_db_addr == MAP_FAILED) {
468 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
469 priv->virtq_db_addr = NULL;
472 DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
473 priv->virtq_db_addr);
475 priv->td = mlx5_devx_cmd_create_td(ctx);
477 DRV_LOG(ERR, "Failed to create transport domain.");
480 tis_attr.transport_domain = priv->td->id;
481 for (i = 0; i < priv->num_lag_ports; i++) {
482 /* 0 is auto affinity, non-zero value to propose port. */
483 tis_attr.lag_tx_port_affinity = i + 1;
484 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
485 if (!priv->tiss[i]) {
486 DRV_LOG(ERR, "Failed to create TIS %u.", i);
490 priv->nr_virtqs = nr_vring;
491 for (i = 0; i < nr_vring; i++)
492 if (priv->virtqs[i].enable && mlx5_vdpa_virtq_setup(priv, i))
496 mlx5_vdpa_virtqs_release(priv);
501 mlx5_vdpa_virtq_is_modified(struct mlx5_vdpa_priv *priv,
502 struct mlx5_vdpa_virtq *virtq)
504 struct rte_vhost_vring vq;
505 int ret = rte_vhost_get_vhost_vring(priv->vid, virtq->index, &vq);
509 if (vq.size != virtq->vq_size || vq.kickfd != virtq->intr_handle.fd)
511 if (virtq->eqp.cq.cq_obj.cq) {
512 if (vq.callfd != virtq->eqp.cq.callfd)
514 } else if (vq.callfd != -1) {
521 mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int index, int enable)
523 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
526 DRV_LOG(INFO, "Update virtq %d status %sable -> %sable.", index,
527 virtq->enable ? "en" : "dis", enable ? "en" : "dis");
528 if (!priv->configured) {
529 virtq->enable = !!enable;
532 if (virtq->enable == !!enable) {
535 ret = mlx5_vdpa_virtq_is_modified(priv, virtq);
537 DRV_LOG(ERR, "Virtq %d modify check failed.", index);
542 DRV_LOG(INFO, "Virtq %d was modified, recreate it.", index);
546 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
547 ret = mlx5_vdpa_steer_update(priv);
549 DRV_LOG(WARNING, "Failed to disable steering "
550 "for virtq %d.", index);
552 mlx5_vdpa_virtq_unset(virtq);
555 ret = mlx5_vdpa_virtq_setup(priv, index);
557 DRV_LOG(ERR, "Failed to setup virtq %d.", index);
561 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
562 ret = mlx5_vdpa_steer_update(priv);
564 DRV_LOG(WARNING, "Failed to enable steering "
565 "for virtq %d.", index);
572 mlx5_vdpa_virtq_stats_get(struct mlx5_vdpa_priv *priv, int qid,
573 struct rte_vdpa_stat *stats, unsigned int n)
575 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
576 struct mlx5_devx_virtio_q_couners_attr attr = {0};
579 if (!virtq->counters) {
580 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
584 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters, &attr);
586 DRV_LOG(ERR, "Failed to read virtq %d stats from HW.", qid);
589 ret = (int)RTE_MIN(n, (unsigned int)MLX5_VDPA_STATS_MAX);
590 if (ret == MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS)
592 stats[MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS] = (struct rte_vdpa_stat) {
593 .id = MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS,
594 .value = attr.received_desc - virtq->reset.received_desc,
596 if (ret == MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS)
598 stats[MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS] = (struct rte_vdpa_stat) {
599 .id = MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS,
600 .value = attr.completed_desc - virtq->reset.completed_desc,
602 if (ret == MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS)
604 stats[MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS] = (struct rte_vdpa_stat) {
605 .id = MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS,
606 .value = attr.bad_desc_errors - virtq->reset.bad_desc_errors,
608 if (ret == MLX5_VDPA_STATS_EXCEED_MAX_CHAIN)
610 stats[MLX5_VDPA_STATS_EXCEED_MAX_CHAIN] = (struct rte_vdpa_stat) {
611 .id = MLX5_VDPA_STATS_EXCEED_MAX_CHAIN,
612 .value = attr.exceed_max_chain - virtq->reset.exceed_max_chain,
614 if (ret == MLX5_VDPA_STATS_INVALID_BUFFER)
616 stats[MLX5_VDPA_STATS_INVALID_BUFFER] = (struct rte_vdpa_stat) {
617 .id = MLX5_VDPA_STATS_INVALID_BUFFER,
618 .value = attr.invalid_buffer - virtq->reset.invalid_buffer,
620 if (ret == MLX5_VDPA_STATS_COMPLETION_ERRORS)
622 stats[MLX5_VDPA_STATS_COMPLETION_ERRORS] = (struct rte_vdpa_stat) {
623 .id = MLX5_VDPA_STATS_COMPLETION_ERRORS,
624 .value = attr.error_cqes - virtq->reset.error_cqes,
630 mlx5_vdpa_virtq_stats_reset(struct mlx5_vdpa_priv *priv, int qid)
632 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
635 if (!virtq->counters) {
636 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
640 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters,
643 DRV_LOG(ERR, "Failed to read virtq %d reset stats from HW.",