1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
6 #include <sys/eventfd.h>
8 #include <rte_malloc.h>
12 #include <mlx5_common.h>
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
19 mlx5_vdpa_virtq_kick_handler(void *cb_arg)
21 struct mlx5_vdpa_virtq *virtq = cb_arg;
22 struct mlx5_vdpa_priv *priv = virtq->priv;
27 if (priv->state != MLX5_VDPA_STATE_CONFIGURED && !virtq->enable) {
28 DRV_LOG(ERR, "device %d queue %d down, skip kick handling",
29 priv->vid, virtq->index);
32 if (rte_intr_fd_get(virtq->intr_handle) < 0)
34 for (retry = 0; retry < 3; ++retry) {
35 nbytes = read(rte_intr_fd_get(virtq->intr_handle), &buf,
39 errno == EWOULDBLOCK ||
42 DRV_LOG(ERR, "Failed to read kickfd of virtq %d: %s",
43 virtq->index, strerror(errno));
49 rte_write32(virtq->index, priv->virtq_db_addr);
50 if (priv->state != MLX5_VDPA_STATE_CONFIGURED && !virtq->enable) {
51 DRV_LOG(ERR, "device %d queue %d down, skip kick handling",
52 priv->vid, virtq->index);
55 if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) {
56 if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true))
57 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_ERR;
59 virtq->notifier_state =
60 MLX5_VDPA_NOTIFIER_STATE_ENABLED;
61 DRV_LOG(INFO, "Virtq %u notifier state is %s.", virtq->index,
62 virtq->notifier_state ==
63 MLX5_VDPA_NOTIFIER_STATE_ENABLED ? "enabled" :
66 DRV_LOG(DEBUG, "Ring virtq %u doorbell.", virtq->index);
69 /* Release cached VQ resources. */
71 mlx5_vdpa_virtqs_cleanup(struct mlx5_vdpa_priv *priv)
75 for (i = 0; i < priv->caps.max_num_virtio_queues * 2; i++) {
76 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[i];
78 for (j = 0; j < RTE_DIM(virtq->umems); ++j) {
79 if (virtq->umems[j].obj) {
80 claim_zero(mlx5_glue->devx_umem_dereg
81 (virtq->umems[j].obj));
82 virtq->umems[j].obj = NULL;
84 if (virtq->umems[j].buf) {
85 rte_free(virtq->umems[j].buf);
86 virtq->umems[j].buf = NULL;
88 virtq->umems[j].size = 0;
94 mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq)
98 if (rte_intr_fd_get(virtq->intr_handle) >= 0) {
99 while (ret == -EAGAIN) {
100 ret = rte_intr_callback_unregister(virtq->intr_handle,
101 mlx5_vdpa_virtq_kick_handler, virtq);
102 if (ret == -EAGAIN) {
103 DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt",
104 rte_intr_fd_get(virtq->intr_handle),
106 usleep(MLX5_VDPA_INTR_RETRIES_USEC);
109 rte_intr_fd_set(virtq->intr_handle, -1);
111 rte_intr_instance_free(virtq->intr_handle);
113 ret = mlx5_vdpa_virtq_stop(virtq->priv, virtq->index);
115 DRV_LOG(WARNING, "Failed to stop virtq %d.",
117 claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
120 if (virtq->eqp.fw_qp)
121 mlx5_vdpa_event_qp_destroy(&virtq->eqp);
122 virtq->notifier_state = MLX5_VDPA_NOTIFIER_STATE_DISABLED;
127 mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
130 struct mlx5_vdpa_virtq *virtq;
132 for (i = 0; i < priv->nr_virtqs; i++) {
133 virtq = &priv->virtqs[i];
134 mlx5_vdpa_virtq_unset(virtq);
136 claim_zero(mlx5_devx_cmd_destroy(virtq->counters));
143 mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, int state)
145 struct mlx5_devx_virtq_attr attr = {
146 .type = MLX5_VIRTQ_MODIFY_TYPE_STATE,
147 .state = state ? MLX5_VIRTQ_STATE_RDY :
148 MLX5_VIRTQ_STATE_SUSPEND,
149 .queue_index = virtq->index,
152 return mlx5_devx_cmd_modify_virtq(virtq->virtq, &attr);
156 mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index)
158 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
163 ret = mlx5_vdpa_virtq_modify(virtq, 0);
166 virtq->stopped = true;
167 DRV_LOG(DEBUG, "vid %u virtq %u was stopped.", priv->vid, index);
168 return mlx5_vdpa_virtq_query(priv, index);
172 mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index)
174 struct mlx5_devx_virtq_attr attr = {0};
175 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
178 if (mlx5_devx_cmd_query_virtq(virtq->virtq, &attr)) {
179 DRV_LOG(ERR, "Failed to query virtq %d.", index);
182 DRV_LOG(INFO, "Query vid %d vring %d: hw_available_idx=%d, "
183 "hw_used_index=%d", priv->vid, index,
184 attr.hw_available_index, attr.hw_used_index);
185 ret = rte_vhost_set_vring_base(priv->vid, index,
186 attr.hw_available_index,
189 DRV_LOG(ERR, "Failed to set virtq %d base.", index);
192 if (attr.state == MLX5_VIRTQ_STATE_ERROR)
193 DRV_LOG(WARNING, "vid %d vring %d hw error=%hhu",
194 priv->vid, index, attr.error_type);
199 mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva)
201 struct rte_vhost_mem_region *reg;
205 for (i = 0; i < mem->nregions; i++) {
206 reg = &mem->regions[i];
207 if (hva >= reg->host_user_addr &&
208 hva < reg->host_user_addr + reg->size) {
209 gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
217 mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index)
219 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
220 struct rte_vhost_vring vq;
221 struct mlx5_devx_virtq_attr attr = {0};
225 uint16_t last_avail_idx;
226 uint16_t last_used_idx;
227 uint16_t event_num = MLX5_EVENT_TYPE_OBJECT_CHANGE;
230 ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
235 virtq->index = index;
236 virtq->vq_size = vq.size;
237 attr.tso_ipv4 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4));
238 attr.tso_ipv6 = !!(priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6));
239 attr.tx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_CSUM));
240 attr.rx_csum = !!(priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM));
241 attr.virtio_version_1_0 = !!(priv->features & (1ULL <<
242 VIRTIO_F_VERSION_1));
243 attr.type = (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) ?
244 MLX5_VIRTQ_TYPE_PACKED : MLX5_VIRTQ_TYPE_SPLIT;
246 * No need event QPs creation when the guest in poll mode or when the
247 * capability allows it.
249 attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1 <<
250 MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
251 MLX5_VIRTQ_EVENT_MODE_QP :
252 MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
253 if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
254 ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
257 DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
261 attr.qp_id = virtq->eqp.fw_qp->id;
263 DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode, no"
264 " need event QPs and event mechanism.", index);
266 if (priv->caps.queue_counters_valid) {
267 if (!virtq->counters)
268 virtq->counters = mlx5_devx_cmd_create_virtio_q_counters
270 if (!virtq->counters) {
271 DRV_LOG(ERR, "Failed to create virtq couners for virtq"
275 attr.counters_obj_id = virtq->counters->id;
277 /* Setup 3 UMEMs for each virtq. */
278 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
281 struct mlx5dv_devx_umem *obj;
283 size = priv->caps.umems[i].a * vq.size + priv->caps.umems[i].b;
284 if (virtq->umems[i].size == size &&
285 virtq->umems[i].obj != NULL) {
286 /* Reuse registered memory. */
287 memset(virtq->umems[i].buf, 0, size);
290 if (virtq->umems[i].obj)
291 claim_zero(mlx5_glue->devx_umem_dereg
292 (virtq->umems[i].obj));
293 if (virtq->umems[i].buf)
294 rte_free(virtq->umems[i].buf);
295 virtq->umems[i].size = 0;
296 virtq->umems[i].obj = NULL;
297 virtq->umems[i].buf = NULL;
298 buf = rte_zmalloc(__func__, size, 4096);
300 DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
304 obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx, buf, size,
305 IBV_ACCESS_LOCAL_WRITE);
307 DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
311 virtq->umems[i].size = size;
312 virtq->umems[i].buf = buf;
313 virtq->umems[i].obj = obj;
315 attr.umems[i].id = virtq->umems[i].obj->umem_id;
316 attr.umems[i].offset = 0;
317 attr.umems[i].size = virtq->umems[i].size;
319 if (attr.type == MLX5_VIRTQ_TYPE_SPLIT) {
320 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
321 (uint64_t)(uintptr_t)vq.desc);
323 DRV_LOG(ERR, "Failed to get descriptor ring GPA.");
326 attr.desc_addr = gpa;
327 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
328 (uint64_t)(uintptr_t)vq.used);
330 DRV_LOG(ERR, "Failed to get GPA for used ring.");
333 attr.used_addr = gpa;
334 gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
335 (uint64_t)(uintptr_t)vq.avail);
337 DRV_LOG(ERR, "Failed to get GPA for available ring.");
340 attr.available_addr = gpa;
342 ret = rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
347 DRV_LOG(WARNING, "Couldn't get vring base, idx are set to 0");
349 DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d for "
350 "virtq %d.", priv->vid, last_avail_idx,
351 last_used_idx, index);
353 attr.hw_available_index = last_avail_idx;
354 attr.hw_used_index = last_used_idx;
355 attr.q_size = vq.size;
356 attr.mkey = priv->gpa_mkey_index;
357 attr.tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
358 attr.queue_index = index;
359 attr.pd = priv->cdev->pdn;
360 attr.hw_latency_mode = priv->hw_latency_mode;
361 attr.hw_max_latency_us = priv->hw_max_latency_us;
362 attr.hw_max_pending_comp = priv->hw_max_pending_comp;
363 virtq->virtq = mlx5_devx_cmd_create_virtq(priv->cdev->ctx, &attr);
367 claim_zero(rte_vhost_enable_guest_notification(priv->vid, index, 1));
368 if (mlx5_vdpa_virtq_modify(virtq, 1))
371 rte_write32(virtq->index, priv->virtq_db_addr);
372 /* Setup doorbell mapping. */
374 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
375 if (virtq->intr_handle == NULL) {
376 DRV_LOG(ERR, "Fail to allocate intr_handle");
380 if (rte_intr_fd_set(virtq->intr_handle, vq.kickfd))
383 if (rte_intr_fd_get(virtq->intr_handle) == -1) {
384 DRV_LOG(WARNING, "Virtq %d kickfd is invalid.", index);
386 if (rte_intr_type_set(virtq->intr_handle, RTE_INTR_HANDLE_EXT))
389 if (rte_intr_callback_register(virtq->intr_handle,
390 mlx5_vdpa_virtq_kick_handler,
392 rte_intr_fd_set(virtq->intr_handle, -1);
393 DRV_LOG(ERR, "Failed to register virtq %d interrupt.",
397 DRV_LOG(DEBUG, "Register fd %d interrupt for virtq %d.",
398 rte_intr_fd_get(virtq->intr_handle),
402 /* Subscribe virtq error event. */
404 cookie = ((uint64_t)virtq->version << 32) + index;
405 ret = mlx5_glue->devx_subscribe_devx_event(priv->err_chnl,
410 DRV_LOG(ERR, "Failed to subscribe device %d virtq %d error event.",
415 virtq->stopped = false;
416 /* Initial notification to ask Qemu handling completed buffers. */
417 if (virtq->eqp.cq.callfd != -1)
418 eventfd_write(virtq->eqp.cq.callfd, (eventfd_t)1);
419 DRV_LOG(DEBUG, "vid %u virtq %u was created successfully.", priv->vid,
423 mlx5_vdpa_virtq_unset(virtq);
428 mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv)
430 if (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) {
431 if (!(priv->caps.virtio_queue_type & (1 <<
432 MLX5_VIRTQ_TYPE_PACKED))) {
433 DRV_LOG(ERR, "Failed to configure PACKED mode for vdev "
434 "%d - it was not reported by HW/driver"
435 " capability.", priv->vid);
439 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) {
440 if (!priv->caps.tso_ipv4) {
441 DRV_LOG(ERR, "Failed to enable TSO4 for vdev %d - TSO4"
442 " was not reported by HW/driver capability.",
447 if (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) {
448 if (!priv->caps.tso_ipv6) {
449 DRV_LOG(ERR, "Failed to enable TSO6 for vdev %d - TSO6"
450 " was not reported by HW/driver capability.",
455 if (priv->features & (1ULL << VIRTIO_NET_F_CSUM)) {
456 if (!priv->caps.tx_csum) {
457 DRV_LOG(ERR, "Failed to enable CSUM for vdev %d - CSUM"
458 " was not reported by HW/driver capability.",
463 if (priv->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
464 if (!priv->caps.rx_csum) {
465 DRV_LOG(ERR, "Failed to enable GUEST CSUM for vdev %d"
466 " GUEST CSUM was not reported by HW/driver "
467 "capability.", priv->vid);
471 if (priv->features & (1ULL << VIRTIO_F_VERSION_1)) {
472 if (!priv->caps.virtio_version_1_0) {
473 DRV_LOG(ERR, "Failed to enable version 1 for vdev %d "
474 "version 1 was not reported by HW/driver"
475 " capability.", priv->vid);
483 mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
486 uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
487 int ret = rte_vhost_get_negotiated_features(priv->vid, &priv->features);
489 if (ret || mlx5_vdpa_features_validate(priv)) {
490 DRV_LOG(ERR, "Failed to configure negotiated features.");
493 if ((priv->features & (1ULL << VIRTIO_NET_F_CSUM)) == 0 &&
494 ((priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) > 0 ||
495 (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) > 0)) {
496 /* Packet may be corrupted if TSO is enabled without CSUM. */
497 DRV_LOG(INFO, "TSO is enabled without CSUM, force CSUM.");
498 priv->features |= (1ULL << VIRTIO_NET_F_CSUM);
500 if (nr_vring > priv->caps.max_num_virtio_queues * 2) {
501 DRV_LOG(ERR, "Do not support more than %d virtqs(%d).",
502 (int)priv->caps.max_num_virtio_queues * 2,
506 priv->nr_virtqs = nr_vring;
507 for (i = 0; i < nr_vring; i++)
508 if (priv->virtqs[i].enable && mlx5_vdpa_virtq_setup(priv, i))
512 mlx5_vdpa_virtqs_release(priv);
517 mlx5_vdpa_virtq_is_modified(struct mlx5_vdpa_priv *priv,
518 struct mlx5_vdpa_virtq *virtq)
520 struct rte_vhost_vring vq;
521 int ret = rte_vhost_get_vhost_vring(priv->vid, virtq->index, &vq);
525 if (vq.size != virtq->vq_size || vq.kickfd !=
526 rte_intr_fd_get(virtq->intr_handle))
528 if (virtq->eqp.cq.cq_obj.cq) {
529 if (vq.callfd != virtq->eqp.cq.callfd)
531 } else if (vq.callfd != -1) {
538 mlx5_vdpa_virtq_enable(struct mlx5_vdpa_priv *priv, int index, int enable)
540 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
543 DRV_LOG(INFO, "Update virtq %d status %sable -> %sable.", index,
544 virtq->enable ? "en" : "dis", enable ? "en" : "dis");
545 if (priv->state == MLX5_VDPA_STATE_PROBED) {
546 virtq->enable = !!enable;
549 if (virtq->enable == !!enable) {
552 ret = mlx5_vdpa_virtq_is_modified(priv, virtq);
554 DRV_LOG(ERR, "Virtq %d modify check failed.", index);
559 DRV_LOG(INFO, "Virtq %d was modified, recreate it.", index);
563 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
564 ret = mlx5_vdpa_steer_update(priv);
566 DRV_LOG(WARNING, "Failed to disable steering "
567 "for virtq %d.", index);
569 mlx5_vdpa_virtq_unset(virtq);
572 ret = mlx5_vdpa_virtq_setup(priv, index);
574 DRV_LOG(ERR, "Failed to setup virtq %d.", index);
578 if (is_virtq_recvq(virtq->index, priv->nr_virtqs)) {
579 ret = mlx5_vdpa_steer_update(priv);
581 DRV_LOG(WARNING, "Failed to enable steering "
582 "for virtq %d.", index);
589 mlx5_vdpa_virtq_stats_get(struct mlx5_vdpa_priv *priv, int qid,
590 struct rte_vdpa_stat *stats, unsigned int n)
592 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
593 struct mlx5_devx_virtio_q_couners_attr attr = {0};
596 if (!virtq->counters) {
597 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
601 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters, &attr);
603 DRV_LOG(ERR, "Failed to read virtq %d stats from HW.", qid);
606 ret = (int)RTE_MIN(n, (unsigned int)MLX5_VDPA_STATS_MAX);
607 if (ret == MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS)
609 stats[MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS] = (struct rte_vdpa_stat) {
610 .id = MLX5_VDPA_STATS_RECEIVED_DESCRIPTORS,
611 .value = attr.received_desc - virtq->reset.received_desc,
613 if (ret == MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS)
615 stats[MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS] = (struct rte_vdpa_stat) {
616 .id = MLX5_VDPA_STATS_COMPLETED_DESCRIPTORS,
617 .value = attr.completed_desc - virtq->reset.completed_desc,
619 if (ret == MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS)
621 stats[MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS] = (struct rte_vdpa_stat) {
622 .id = MLX5_VDPA_STATS_BAD_DESCRIPTOR_ERRORS,
623 .value = attr.bad_desc_errors - virtq->reset.bad_desc_errors,
625 if (ret == MLX5_VDPA_STATS_EXCEED_MAX_CHAIN)
627 stats[MLX5_VDPA_STATS_EXCEED_MAX_CHAIN] = (struct rte_vdpa_stat) {
628 .id = MLX5_VDPA_STATS_EXCEED_MAX_CHAIN,
629 .value = attr.exceed_max_chain - virtq->reset.exceed_max_chain,
631 if (ret == MLX5_VDPA_STATS_INVALID_BUFFER)
633 stats[MLX5_VDPA_STATS_INVALID_BUFFER] = (struct rte_vdpa_stat) {
634 .id = MLX5_VDPA_STATS_INVALID_BUFFER,
635 .value = attr.invalid_buffer - virtq->reset.invalid_buffer,
637 if (ret == MLX5_VDPA_STATS_COMPLETION_ERRORS)
639 stats[MLX5_VDPA_STATS_COMPLETION_ERRORS] = (struct rte_vdpa_stat) {
640 .id = MLX5_VDPA_STATS_COMPLETION_ERRORS,
641 .value = attr.error_cqes - virtq->reset.error_cqes,
647 mlx5_vdpa_virtq_stats_reset(struct mlx5_vdpa_priv *priv, int qid)
649 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[qid];
652 if (!virtq->counters) {
653 DRV_LOG(ERR, "Failed to read virtq %d statistics - virtq "
657 ret = mlx5_devx_cmd_query_virtio_q_counters(virtq->counters,
660 DRV_LOG(ERR, "Failed to read virtq %d reset stats from HW.",