1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
8 #include <sys/eventfd.h>
10 #include <rte_malloc.h>
11 #include <rte_memory.h>
12 #include <rte_errno.h>
13 #include <rte_lcore.h>
14 #include <rte_atomic.h>
15 #include <rte_common.h>
17 #include <rte_alarm.h>
19 #include <mlx5_common.h>
20 #include <mlx5_common_os.h>
21 #include <mlx5_common_devx.h>
22 #include <mlx5_glue.h>
24 #include "mlx5_vdpa_utils.h"
25 #include "mlx5_vdpa.h"
28 #define MLX5_VDPA_ERROR_TIME_SEC 3u
31 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
33 mlx5_devx_uar_release(&priv->uar);
34 #ifdef HAVE_IBV_DEVX_EVENT
36 mlx5_os_devx_destroy_event_channel(priv->eventc);
42 /* Prepare all the global resources for all the event objects.*/
44 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
48 priv->eventc = mlx5_os_devx_create_event_channel(priv->cdev->ctx,
49 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
52 DRV_LOG(ERR, "Failed to create event channel %d.",
56 if (mlx5_devx_uar_prepare(priv->cdev, &priv->uar) != 0) {
57 DRV_LOG(ERR, "Failed to allocate UAR.");
62 mlx5_vdpa_event_qp_global_release(priv);
67 mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq)
69 mlx5_devx_cq_destroy(&cq->cq_obj);
70 memset(cq, 0, sizeof(*cq));
73 static inline void __rte_unused
74 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
76 uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
77 uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK;
78 uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
79 uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq_obj.cq->id;
80 uint64_t db_be = rte_cpu_to_be_64(doorbell);
82 mlx5_doorbell_ring(&priv->uar.cq_db, db_be, doorbell_hi,
83 &cq->cq_obj.db_rec[MLX5_CQ_ARM_DB], 0);
89 mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n,
90 int callfd, struct mlx5_vdpa_cq *cq)
92 struct mlx5_devx_cq_attr attr = {
94 .uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
96 uint16_t event_nums[1] = {0};
99 ret = mlx5_devx_cq_create(priv->cdev->ctx, &cq->cq_obj, log_desc_n,
100 &attr, SOCKET_ID_ANY);
104 cq->log_desc_n = log_desc_n;
105 rte_spinlock_init(&cq->sl);
106 /* Subscribe CQ event to the event channel controlled by the driver. */
107 ret = mlx5_os_devx_subscribe_devx_event(priv->eventc,
109 sizeof(event_nums), event_nums,
110 (uint64_t)(uintptr_t)cq);
112 DRV_LOG(ERR, "Failed to subscribe CQE event.");
117 /* Init CQ to ones to be in HW owner in the start. */
118 cq->cq_obj.cqes[0].op_own = MLX5_CQE_OWNER_MASK;
119 cq->cq_obj.cqes[0].wqe_counter = rte_cpu_to_be_16(UINT16_MAX);
121 mlx5_vdpa_cq_arm(priv, cq);
124 mlx5_vdpa_cq_destroy(cq);
128 static inline uint32_t
129 mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
131 struct mlx5_vdpa_event_qp *eqp =
132 container_of(cq, struct mlx5_vdpa_event_qp, cq);
133 const unsigned int cq_size = 1 << cq->log_desc_n;
136 uint16_t wqe_counter;
142 uint16_t next_wqe_counter = cq->cq_ci;
143 uint16_t cur_wqe_counter;
146 last_word.word = rte_read32(&cq->cq_obj.cqes[0].wqe_counter);
147 cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
148 comp = cur_wqe_counter + (uint16_t)1 - next_wqe_counter;
151 MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
153 if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
155 MLX5_CQE_OPCODE(last_word.op_own) ==
159 /* Ring CQ doorbell record. */
160 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
162 /* Ring SW QP doorbell record. */
163 eqp->sw_qp.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
169 mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
171 struct mlx5_vdpa_cq *cq;
174 for (i = 0; i < priv->nr_virtqs; i++) {
175 cq = &priv->virtqs[i].eqp.cq;
176 if (cq->cq_obj.cq && !cq->armed)
177 mlx5_vdpa_cq_arm(priv, cq);
182 mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
184 if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
187 priv->timer_delay_us += priv->event_us;
192 priv->timer_delay_us /= max;
196 if (priv->timer_delay_us)
197 usleep(priv->timer_delay_us);
199 /* Give-up CPU to improve polling threads scheduling. */
203 /* Notify virtio device for specific virtq new traffic. */
205 mlx5_vdpa_queue_complete(struct mlx5_vdpa_cq *cq)
210 comp = mlx5_vdpa_cq_poll(cq);
212 if (cq->callfd != -1)
213 eventfd_write(cq->callfd, (eventfd_t)1);
220 /* Notify virtio device for any virtq new traffic. */
222 mlx5_vdpa_queues_complete(struct mlx5_vdpa_priv *priv)
227 for (i = 0; i < priv->nr_virtqs; i++) {
228 struct mlx5_vdpa_cq *cq = &priv->virtqs[i].eqp.cq;
229 uint32_t comp = mlx5_vdpa_queue_complete(cq);
237 /* Wait on all CQs channel for completion event. */
238 static struct mlx5_vdpa_cq *
239 mlx5_vdpa_event_wait(struct mlx5_vdpa_priv *priv __rte_unused)
241 #ifdef HAVE_IBV_DEVX_EVENT
243 struct mlx5dv_devx_async_event_hdr event_resp;
244 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
246 int ret = mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
250 return (struct mlx5_vdpa_cq *)(uintptr_t)out.event_resp.cookie;
251 DRV_LOG(INFO, "Got error in devx_get_event, ret = %d, errno = %d.",
258 mlx5_vdpa_event_handle(void *arg)
260 struct mlx5_vdpa_priv *priv = arg;
261 struct mlx5_vdpa_cq *cq;
264 switch (priv->event_mode) {
265 case MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER:
266 case MLX5_VDPA_EVENT_MODE_FIXED_TIMER:
267 priv->timer_delay_us = priv->event_us;
269 pthread_mutex_lock(&priv->vq_config_lock);
270 max = mlx5_vdpa_queues_complete(priv);
271 if (max == 0 && priv->no_traffic_counter++ >=
272 priv->no_traffic_max) {
273 DRV_LOG(DEBUG, "Device %s traffic was stopped.",
274 priv->vdev->device->name);
275 mlx5_vdpa_arm_all_cqs(priv);
278 (&priv->vq_config_lock);
279 cq = mlx5_vdpa_event_wait(priv);
281 (&priv->vq_config_lock);
283 mlx5_vdpa_queue_complete(cq) > 0)
286 priv->timer_delay_us = priv->event_us;
287 priv->no_traffic_counter = 0;
288 } else if (max != 0) {
289 priv->no_traffic_counter = 0;
291 pthread_mutex_unlock(&priv->vq_config_lock);
292 mlx5_vdpa_timer_sleep(priv, max);
295 case MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT:
297 cq = mlx5_vdpa_event_wait(priv);
299 pthread_mutex_lock(&priv->vq_config_lock);
300 if (mlx5_vdpa_queue_complete(cq) > 0)
301 mlx5_vdpa_cq_arm(priv, cq);
302 pthread_mutex_unlock(&priv->vq_config_lock);
312 mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
314 #ifdef HAVE_IBV_DEVX_EVENT
315 struct mlx5_vdpa_priv *priv = cb_arg;
317 struct mlx5dv_devx_async_event_hdr event_resp;
318 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
320 uint32_t vq_index, i, version;
321 struct mlx5_vdpa_virtq *virtq;
324 pthread_mutex_lock(&priv->vq_config_lock);
325 while (mlx5_glue->devx_get_event(priv->err_chnl, &out.event_resp,
327 (ssize_t)sizeof(out.event_resp.cookie)) {
328 vq_index = out.event_resp.cookie & UINT32_MAX;
329 version = out.event_resp.cookie >> 32;
330 if (vq_index >= priv->nr_virtqs) {
331 DRV_LOG(ERR, "Invalid device %s error event virtq %d.",
332 priv->vdev->device->name, vq_index);
335 virtq = &priv->virtqs[vq_index];
336 if (!virtq->enable || virtq->version != version)
338 if (rte_rdtsc() / rte_get_tsc_hz() < MLX5_VDPA_ERROR_TIME_SEC)
340 virtq->stopped = true;
341 /* Query error info. */
342 if (mlx5_vdpa_virtq_query(priv, vq_index))
345 if (mlx5_vdpa_virtq_enable(priv, vq_index, 0)) {
346 DRV_LOG(ERR, "Failed to disable virtq %d.", vq_index);
349 /* Retry if error happens less than N times in 3 seconds. */
350 sec = (rte_rdtsc() - virtq->err_time[0]) / rte_get_tsc_hz();
351 if (sec > MLX5_VDPA_ERROR_TIME_SEC) {
353 if (mlx5_vdpa_virtq_enable(priv, vq_index, 1))
354 DRV_LOG(ERR, "Failed to enable virtq %d.",
357 DRV_LOG(WARNING, "Recover virtq %d: %u.",
358 vq_index, ++virtq->n_retry);
360 /* Retry timeout, give up. */
361 DRV_LOG(ERR, "Device %s virtq %d failed to recover.",
362 priv->vdev->device->name, vq_index);
365 /* Shift in current time to error time log end. */
366 for (i = 1; i < RTE_DIM(virtq->err_time); i++)
367 virtq->err_time[i - 1] = virtq->err_time[i];
368 virtq->err_time[RTE_DIM(virtq->err_time) - 1] = rte_rdtsc();
370 pthread_mutex_unlock(&priv->vq_config_lock);
375 mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv)
380 /* Setup device event channel. */
381 priv->err_chnl = mlx5_glue->devx_create_event_channel(priv->cdev->ctx,
383 if (!priv->err_chnl) {
385 DRV_LOG(ERR, "Failed to create device event channel %d.",
389 flags = fcntl(priv->err_chnl->fd, F_GETFL);
390 ret = fcntl(priv->err_chnl->fd, F_SETFL, flags | O_NONBLOCK);
392 DRV_LOG(ERR, "Failed to change device event channel FD.");
396 if (rte_intr_fd_set(priv->err_intr_handle, priv->err_chnl->fd))
399 if (rte_intr_type_set(priv->err_intr_handle, RTE_INTR_HANDLE_EXT))
402 if (rte_intr_callback_register(priv->err_intr_handle,
403 mlx5_vdpa_err_interrupt_handler,
405 rte_intr_fd_set(priv->err_intr_handle, 0);
406 DRV_LOG(ERR, "Failed to register error interrupt for device %d.",
410 DRV_LOG(DEBUG, "Registered error interrupt for device%d.",
415 mlx5_vdpa_err_event_unset(priv);
420 mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv)
422 int retries = MLX5_VDPA_INTR_RETRIES;
425 if (!rte_intr_fd_get(priv->err_intr_handle))
427 while (retries-- && ret == -EAGAIN) {
428 ret = rte_intr_callback_unregister(priv->err_intr_handle,
429 mlx5_vdpa_err_interrupt_handler,
431 if (ret == -EAGAIN) {
432 DRV_LOG(DEBUG, "Try again to unregister fd %d "
433 "of error interrupt, retries = %d.",
434 rte_intr_fd_get(priv->err_intr_handle),
439 if (priv->err_chnl) {
440 #ifdef HAVE_IBV_DEVX_EVENT
442 struct mlx5dv_devx_async_event_hdr event_resp;
443 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) +
447 /* Clean all pending events. */
448 while (mlx5_glue->devx_get_event(priv->err_chnl,
449 &out.event_resp, sizeof(out.buf)) >=
450 (ssize_t)sizeof(out.event_resp.cookie))
453 mlx5_glue->devx_destroy_event_channel(priv->err_chnl);
454 priv->err_chnl = NULL;
459 mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
465 const struct sched_param sp = {
466 .sched_priority = sched_get_priority_max(SCHED_RR),
470 /* All virtqs are in poll mode. */
472 pthread_attr_init(&attr);
473 ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
475 DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
478 ret = pthread_attr_setschedparam(&attr, &sp);
480 DRV_LOG(ERR, "Failed to set thread priority.");
483 ret = pthread_create(&priv->timer_tid, &attr, mlx5_vdpa_event_handle,
486 DRV_LOG(ERR, "Failed to create timer thread.");
490 if (priv->event_core != -1)
491 CPU_SET(priv->event_core, &cpuset);
493 cpuset = rte_lcore_cpuset(rte_get_main_lcore());
494 ret = pthread_setaffinity_np(priv->timer_tid, sizeof(cpuset), &cpuset);
496 DRV_LOG(ERR, "Failed to set thread affinity.");
499 snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
500 ret = rte_thread_setname(priv->timer_tid, name);
502 DRV_LOG(DEBUG, "Cannot set timer thread name.");
507 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
511 if (priv->timer_tid) {
512 pthread_cancel(priv->timer_tid);
513 pthread_join(priv->timer_tid, &status);
519 mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp)
521 mlx5_devx_qp_destroy(&eqp->sw_qp);
523 claim_zero(mlx5_devx_cmd_destroy(eqp->fw_qp));
524 mlx5_vdpa_cq_destroy(&eqp->cq);
525 memset(eqp, 0, sizeof(*eqp));
529 mlx5_vdpa_qps2rts(struct mlx5_vdpa_event_qp *eqp)
531 if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RST2INIT_QP,
532 eqp->sw_qp.qp->id)) {
533 DRV_LOG(ERR, "Failed to modify FW QP to INIT state(%u).",
537 if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp,
538 MLX5_CMD_OP_RST2INIT_QP, eqp->fw_qp->id)) {
539 DRV_LOG(ERR, "Failed to modify SW QP to INIT state(%u).",
543 if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_INIT2RTR_QP,
544 eqp->sw_qp.qp->id)) {
545 DRV_LOG(ERR, "Failed to modify FW QP to RTR state(%u).",
549 if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp,
550 MLX5_CMD_OP_INIT2RTR_QP, eqp->fw_qp->id)) {
551 DRV_LOG(ERR, "Failed to modify SW QP to RTR state(%u).",
555 if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RTR2RTS_QP,
556 eqp->sw_qp.qp->id)) {
557 DRV_LOG(ERR, "Failed to modify FW QP to RTS state(%u).",
561 if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp, MLX5_CMD_OP_RTR2RTS_QP,
563 DRV_LOG(ERR, "Failed to modify SW QP to RTS state(%u).",
571 mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
572 int callfd, struct mlx5_vdpa_event_qp *eqp)
574 struct mlx5_devx_qp_attr attr = {0};
575 uint16_t log_desc_n = rte_log2_u32(desc_n);
578 if (mlx5_vdpa_event_qp_global_prepare(priv))
580 if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq))
582 attr.pd = priv->cdev->pdn;
584 mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
585 eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->cdev->ctx, &attr);
587 DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno);
590 attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
591 attr.cqn = eqp->cq.cq_obj.cq->id;
592 attr.num_of_receive_wqes = RTE_BIT32(log_desc_n);
593 attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);
594 attr.num_of_send_wqbbs = 0; /* No need SQ. */
596 mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
597 ret = mlx5_devx_qp_create(priv->cdev->ctx, &(eqp->sw_qp),
598 attr.num_of_receive_wqes *
599 MLX5_WSEG_SIZE, &attr, SOCKET_ID_ANY);
601 DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno);
604 if (mlx5_vdpa_qps2rts(eqp))
607 rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)),
608 &eqp->sw_qp.db_rec[0]);
611 mlx5_vdpa_event_qp_destroy(eqp);