vdpa/mlx5: default polling mode delay time to zero
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa_event.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <stdint.h>
6 #include <fcntl.h>
7 #include <sys/eventfd.h>
8
9 #include <rte_malloc.h>
10 #include <rte_errno.h>
11 #include <rte_lcore.h>
12 #include <rte_atomic.h>
13 #include <rte_common.h>
14 #include <rte_io.h>
15 #include <rte_alarm.h>
16
17 #include <mlx5_common.h>
18 #include <mlx5_common_os.h>
19 #include <mlx5_glue.h>
20
21 #include "mlx5_vdpa_utils.h"
22 #include "mlx5_vdpa.h"
23
24
25 #define MLX5_VDPA_ERROR_TIME_SEC 3u
26
27 void
28 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
29 {
30         if (priv->uar) {
31                 mlx5_glue->devx_free_uar(priv->uar);
32                 priv->uar = NULL;
33         }
34 #ifdef HAVE_IBV_DEVX_EVENT
35         if (priv->eventc) {
36                 union {
37                         struct mlx5dv_devx_async_event_hdr event_resp;
38                         uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
39                                                                          + 128];
40                 } out;
41
42                 /* Clean all pending events. */
43                 while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
44                        sizeof(out.buf)) >=
45                        (ssize_t)sizeof(out.event_resp.cookie))
46                         ;
47                 mlx5_os_devx_destroy_event_channel(priv->eventc);
48                 priv->eventc = NULL;
49         }
50 #endif
51         priv->eqn = 0;
52 }
53
54 /* Prepare all the global resources for all the event objects.*/
55 static int
56 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
57 {
58         int flags, ret;
59
60         if (priv->eventc)
61                 return 0;
62         if (mlx5_glue->devx_query_eqn(priv->ctx, 0, &priv->eqn)) {
63                 rte_errno = errno;
64                 DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno);
65                 return -1;
66         }
67         priv->eventc = mlx5_os_devx_create_event_channel(priv->ctx,
68                            MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
69         if (!priv->eventc) {
70                 rte_errno = errno;
71                 DRV_LOG(ERR, "Failed to create event channel %d.",
72                         rte_errno);
73                 goto error;
74         }
75         flags = fcntl(priv->eventc->fd, F_GETFL);
76         ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
77         if (ret) {
78                 DRV_LOG(ERR, "Failed to change event channel FD.");
79                 goto error;
80         }
81         /*
82          * This PMD always claims the write memory barrier on UAR
83          * registers writings, it is safe to allocate UAR with any
84          * memory mapping type.
85          */
86         priv->uar = mlx5_devx_alloc_uar(priv->ctx, -1);
87         if (!priv->uar) {
88                 rte_errno = errno;
89                 DRV_LOG(ERR, "Failed to allocate UAR.");
90                 goto error;
91         }
92         return 0;
93 error:
94         mlx5_vdpa_event_qp_global_release(priv);
95         return -1;
96 }
97
98 static void
99 mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq)
100 {
101         if (cq->cq)
102                 claim_zero(mlx5_devx_cmd_destroy(cq->cq));
103         if (cq->umem_obj)
104                 claim_zero(mlx5_glue->devx_umem_dereg(cq->umem_obj));
105         if (cq->umem_buf)
106                 rte_free((void *)(uintptr_t)cq->umem_buf);
107         memset(cq, 0, sizeof(*cq));
108 }
109
110 static inline void __rte_unused
111 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
112 {
113         uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
114         uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK;
115         uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
116         uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq->id;
117         uint64_t db_be = rte_cpu_to_be_64(doorbell);
118         uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, MLX5_CQ_DOORBELL);
119
120         rte_io_wmb();
121         cq->db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
122         rte_wmb();
123 #ifdef RTE_ARCH_64
124         *(uint64_t *)addr = db_be;
125 #else
126         *(uint32_t *)addr = db_be;
127         rte_io_wmb();
128         *((uint32_t *)addr + 1) = db_be >> 32;
129 #endif
130         cq->arm_sn++;
131         cq->armed = 1;
132 }
133
134 static int
135 mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n,
136                     int callfd, struct mlx5_vdpa_cq *cq)
137 {
138         struct mlx5_devx_cq_attr attr = {0};
139         size_t pgsize = sysconf(_SC_PAGESIZE);
140         uint32_t umem_size;
141         uint16_t event_nums[1] = {0};
142         uint16_t cq_size = 1 << log_desc_n;
143         int ret;
144
145         cq->log_desc_n = log_desc_n;
146         umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
147         cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
148         if (!cq->umem_buf) {
149                 DRV_LOG(ERR, "Failed to allocate memory for CQ.");
150                 rte_errno = ENOMEM;
151                 return -ENOMEM;
152         }
153         cq->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx,
154                                                 (void *)(uintptr_t)cq->umem_buf,
155                                                 umem_size,
156                                                 IBV_ACCESS_LOCAL_WRITE);
157         if (!cq->umem_obj) {
158                 DRV_LOG(ERR, "Failed to register umem for CQ.");
159                 goto error;
160         }
161         attr.q_umem_valid = 1;
162         attr.db_umem_valid = 1;
163         attr.use_first_only = 1;
164         attr.overrun_ignore = 0;
165         attr.uar_page_id = priv->uar->page_id;
166         attr.q_umem_id = cq->umem_obj->umem_id;
167         attr.q_umem_offset = 0;
168         attr.db_umem_id = cq->umem_obj->umem_id;
169         attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
170         attr.eqn = priv->eqn;
171         attr.log_cq_size = log_desc_n;
172         attr.log_page_size = rte_log2_u32(pgsize);
173         cq->cq = mlx5_devx_cmd_create_cq(priv->ctx, &attr);
174         if (!cq->cq)
175                 goto error;
176         cq->db_rec = RTE_PTR_ADD(cq->umem_buf, (uintptr_t)attr.db_umem_offset);
177         cq->cq_ci = 0;
178         rte_spinlock_init(&cq->sl);
179         /* Subscribe CQ event to the event channel controlled by the driver. */
180         ret = mlx5_os_devx_subscribe_devx_event(priv->eventc, cq->cq->obj,
181                                                    sizeof(event_nums),
182                                                    event_nums,
183                                                    (uint64_t)(uintptr_t)cq);
184         if (ret) {
185                 DRV_LOG(ERR, "Failed to subscribe CQE event.");
186                 rte_errno = errno;
187                 goto error;
188         }
189         cq->callfd = callfd;
190         /* Init CQ to ones to be in HW owner in the start. */
191         cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK;
192         cq->cqes[0].wqe_counter = rte_cpu_to_be_16(UINT16_MAX);
193         /* First arming. */
194         mlx5_vdpa_cq_arm(priv, cq);
195         return 0;
196 error:
197         mlx5_vdpa_cq_destroy(cq);
198         return -1;
199 }
200
201 static inline uint32_t
202 mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
203 {
204         struct mlx5_vdpa_event_qp *eqp =
205                                 container_of(cq, struct mlx5_vdpa_event_qp, cq);
206         const unsigned int cq_size = 1 << cq->log_desc_n;
207         union {
208                 struct {
209                         uint16_t wqe_counter;
210                         uint8_t rsvd5;
211                         uint8_t op_own;
212                 };
213                 uint32_t word;
214         } last_word;
215         uint16_t next_wqe_counter = cq->cq_ci;
216         uint16_t cur_wqe_counter;
217         uint16_t comp;
218
219         last_word.word = rte_read32(&cq->cqes[0].wqe_counter);
220         cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
221         comp = cur_wqe_counter + (uint16_t)1 - next_wqe_counter;
222         if (comp) {
223                 cq->cq_ci += comp;
224                 MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
225                             MLX5_CQE_INVALID);
226                 if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
227                                MLX5_CQE_RESP_ERR ||
228                                MLX5_CQE_OPCODE(last_word.op_own) ==
229                                MLX5_CQE_REQ_ERR)))
230                         cq->errors++;
231                 rte_io_wmb();
232                 /* Ring CQ doorbell record. */
233                 cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
234                 rte_io_wmb();
235                 /* Ring SW QP doorbell record. */
236                 eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
237         }
238         return comp;
239 }
240
241 static void
242 mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
243 {
244         struct mlx5_vdpa_cq *cq;
245         int i;
246
247         for (i = 0; i < priv->nr_virtqs; i++) {
248                 cq = &priv->virtqs[i].eqp.cq;
249                 if (cq->cq && !cq->armed)
250                         mlx5_vdpa_cq_arm(priv, cq);
251         }
252 }
253
254 static void
255 mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
256 {
257         if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
258                 switch (max) {
259                 case 0:
260                         priv->timer_delay_us += priv->event_us;
261                         break;
262                 case 1:
263                         break;
264                 default:
265                         priv->timer_delay_us /= max;
266                         break;
267                 }
268         }
269         if (priv->timer_delay_us)
270                 usleep(priv->timer_delay_us);
271 }
272
273 static void *
274 mlx5_vdpa_poll_handle(void *arg)
275 {
276         struct mlx5_vdpa_priv *priv = arg;
277         int i;
278         struct mlx5_vdpa_cq *cq;
279         uint32_t max;
280         uint64_t current_tic;
281
282         pthread_mutex_lock(&priv->timer_lock);
283         while (!priv->timer_on)
284                 pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
285         pthread_mutex_unlock(&priv->timer_lock);
286         priv->timer_delay_us = priv->event_mode ==
287                                             MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
288                                               MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
289                                                                  priv->event_us;
290         while (1) {
291                 max = 0;
292                 pthread_mutex_lock(&priv->vq_config_lock);
293                 for (i = 0; i < priv->nr_virtqs; i++) {
294                         cq = &priv->virtqs[i].eqp.cq;
295                         if (cq->cq && !cq->armed) {
296                                 uint32_t comp = mlx5_vdpa_cq_poll(cq);
297
298                                 if (comp) {
299                                         /* Notify guest for descs consuming. */
300                                         if (cq->callfd != -1)
301                                                 eventfd_write(cq->callfd,
302                                                               (eventfd_t)1);
303                                         if (comp > max)
304                                                 max = comp;
305                                 }
306                         }
307                 }
308                 current_tic = rte_rdtsc();
309                 if (!max) {
310                         /* No traffic ? stop timer and load interrupts. */
311                         if (current_tic - priv->last_traffic_tic >=
312                             rte_get_timer_hz() * priv->no_traffic_time_s) {
313                                 DRV_LOG(DEBUG, "Device %s traffic was stopped.",
314                                         priv->vdev->device->name);
315                                 mlx5_vdpa_arm_all_cqs(priv);
316                                 pthread_mutex_unlock(&priv->vq_config_lock);
317                                 pthread_mutex_lock(&priv->timer_lock);
318                                 priv->timer_on = 0;
319                                 while (!priv->timer_on)
320                                         pthread_cond_wait(&priv->timer_cond,
321                                                           &priv->timer_lock);
322                                 pthread_mutex_unlock(&priv->timer_lock);
323                                 priv->timer_delay_us = priv->event_mode ==
324                                             MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
325                                               MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
326                                                                  priv->event_us;
327                                 continue;
328                         }
329                 } else {
330                         priv->last_traffic_tic = current_tic;
331                 }
332                 pthread_mutex_unlock(&priv->vq_config_lock);
333                 mlx5_vdpa_timer_sleep(priv, max);
334         }
335         return NULL;
336 }
337
338 static void
339 mlx5_vdpa_interrupt_handler(void *cb_arg)
340 {
341         struct mlx5_vdpa_priv *priv = cb_arg;
342 #ifdef HAVE_IBV_DEVX_EVENT
343         union {
344                 struct mlx5dv_devx_async_event_hdr event_resp;
345                 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
346         } out;
347
348         pthread_mutex_lock(&priv->vq_config_lock);
349         while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
350                                          sizeof(out.buf)) >=
351                                        (ssize_t)sizeof(out.event_resp.cookie)) {
352                 struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
353                                                (uintptr_t)out.event_resp.cookie;
354                 struct mlx5_vdpa_event_qp *eqp = container_of(cq,
355                                                  struct mlx5_vdpa_event_qp, cq);
356                 struct mlx5_vdpa_virtq *virtq = container_of(eqp,
357                                                    struct mlx5_vdpa_virtq, eqp);
358
359                 if (!virtq->enable)
360                         continue;
361                 mlx5_vdpa_cq_poll(cq);
362                 /* Notify guest for descs consuming. */
363                 if (cq->callfd != -1)
364                         eventfd_write(cq->callfd, (eventfd_t)1);
365                 if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
366                         mlx5_vdpa_cq_arm(priv, cq);
367                         pthread_mutex_unlock(&priv->vq_config_lock);
368                         return;
369                 }
370                 /* Don't arm again - timer will take control. */
371                 DRV_LOG(DEBUG, "Device %s virtq %d cq %d event was captured."
372                         " Timer is %s, cq ci is %u.\n",
373                         priv->vdev->device->name,
374                         (int)virtq->index, cq->cq->id,
375                         priv->timer_on ? "on" : "off", cq->cq_ci);
376                 cq->armed = 0;
377         }
378 #endif
379
380         /* Traffic detected: make sure timer is on. */
381         priv->last_traffic_tic = rte_rdtsc();
382         pthread_mutex_lock(&priv->timer_lock);
383         if (!priv->timer_on) {
384                 priv->timer_on = 1;
385                 pthread_cond_signal(&priv->timer_cond);
386         }
387         pthread_mutex_unlock(&priv->timer_lock);
388         pthread_mutex_unlock(&priv->vq_config_lock);
389 }
390
391 static void
392 mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
393 {
394 #ifdef HAVE_IBV_DEVX_EVENT
395         struct mlx5_vdpa_priv *priv = cb_arg;
396         union {
397                 struct mlx5dv_devx_async_event_hdr event_resp;
398                 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
399         } out;
400         uint32_t vq_index, i, version;
401         struct mlx5_vdpa_virtq *virtq;
402         uint64_t sec;
403
404         pthread_mutex_lock(&priv->vq_config_lock);
405         while (mlx5_glue->devx_get_event(priv->err_chnl, &out.event_resp,
406                                          sizeof(out.buf)) >=
407                                        (ssize_t)sizeof(out.event_resp.cookie)) {
408                 vq_index = out.event_resp.cookie & UINT32_MAX;
409                 version = out.event_resp.cookie >> 32;
410                 if (vq_index >= priv->nr_virtqs) {
411                         DRV_LOG(ERR, "Invalid device %s error event virtq %d.",
412                                 priv->vdev->device->name, vq_index);
413                         continue;
414                 }
415                 virtq = &priv->virtqs[vq_index];
416                 if (!virtq->enable || virtq->version != version)
417                         continue;
418                 if (rte_rdtsc() / rte_get_tsc_hz() < MLX5_VDPA_ERROR_TIME_SEC)
419                         continue;
420                 virtq->stopped = true;
421                 /* Query error info. */
422                 if (mlx5_vdpa_virtq_query(priv, vq_index))
423                         goto log;
424                 /* Disable vq. */
425                 if (mlx5_vdpa_virtq_enable(priv, vq_index, 0)) {
426                         DRV_LOG(ERR, "Failed to disable virtq %d.", vq_index);
427                         goto log;
428                 }
429                 /* Retry if error happens less than N times in 3 seconds. */
430                 sec = (rte_rdtsc() - virtq->err_time[0]) / rte_get_tsc_hz();
431                 if (sec > MLX5_VDPA_ERROR_TIME_SEC) {
432                         /* Retry. */
433                         if (mlx5_vdpa_virtq_enable(priv, vq_index, 1))
434                                 DRV_LOG(ERR, "Failed to enable virtq %d.",
435                                         vq_index);
436                         else
437                                 DRV_LOG(WARNING, "Recover virtq %d: %u.",
438                                         vq_index, ++virtq->n_retry);
439                 } else {
440                         /* Retry timeout, give up. */
441                         DRV_LOG(ERR, "Device %s virtq %d failed to recover.",
442                                 priv->vdev->device->name, vq_index);
443                 }
444 log:
445                 /* Shift in current time to error time log end. */
446                 for (i = 1; i < RTE_DIM(virtq->err_time); i++)
447                         virtq->err_time[i - 1] = virtq->err_time[i];
448                 virtq->err_time[RTE_DIM(virtq->err_time) - 1] = rte_rdtsc();
449         }
450         pthread_mutex_unlock(&priv->vq_config_lock);
451 #endif
452 }
453
454 int
455 mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv)
456 {
457         int ret;
458         int flags;
459
460         /* Setup device event channel. */
461         priv->err_chnl = mlx5_glue->devx_create_event_channel(priv->ctx, 0);
462         if (!priv->err_chnl) {
463                 rte_errno = errno;
464                 DRV_LOG(ERR, "Failed to create device event channel %d.",
465                         rte_errno);
466                 goto error;
467         }
468         flags = fcntl(priv->err_chnl->fd, F_GETFL);
469         ret = fcntl(priv->err_chnl->fd, F_SETFL, flags | O_NONBLOCK);
470         if (ret) {
471                 DRV_LOG(ERR, "Failed to change device event channel FD.");
472                 goto error;
473         }
474         priv->err_intr_handle.fd = priv->err_chnl->fd;
475         priv->err_intr_handle.type = RTE_INTR_HANDLE_EXT;
476         if (rte_intr_callback_register(&priv->err_intr_handle,
477                                        mlx5_vdpa_err_interrupt_handler,
478                                        priv)) {
479                 priv->err_intr_handle.fd = 0;
480                 DRV_LOG(ERR, "Failed to register error interrupt for device %d.",
481                         priv->vid);
482                 goto error;
483         } else {
484                 DRV_LOG(DEBUG, "Registered error interrupt for device%d.",
485                         priv->vid);
486         }
487         return 0;
488 error:
489         mlx5_vdpa_err_event_unset(priv);
490         return -1;
491 }
492
493 void
494 mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv)
495 {
496         int retries = MLX5_VDPA_INTR_RETRIES;
497         int ret = -EAGAIN;
498
499         if (!priv->err_intr_handle.fd)
500                 return;
501         while (retries-- && ret == -EAGAIN) {
502                 ret = rte_intr_callback_unregister(&priv->err_intr_handle,
503                                             mlx5_vdpa_err_interrupt_handler,
504                                             priv);
505                 if (ret == -EAGAIN) {
506                         DRV_LOG(DEBUG, "Try again to unregister fd %d "
507                                 "of error interrupt, retries = %d.",
508                                 priv->err_intr_handle.fd, retries);
509                         rte_pause();
510                 }
511         }
512         memset(&priv->err_intr_handle, 0, sizeof(priv->err_intr_handle));
513         if (priv->err_chnl) {
514 #ifdef HAVE_IBV_DEVX_EVENT
515                 union {
516                         struct mlx5dv_devx_async_event_hdr event_resp;
517                         uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) +
518                                     128];
519                 } out;
520
521                 /* Clean all pending events. */
522                 while (mlx5_glue->devx_get_event(priv->err_chnl,
523                        &out.event_resp, sizeof(out.buf)) >=
524                        (ssize_t)sizeof(out.event_resp.cookie))
525                         ;
526 #endif
527                 mlx5_glue->devx_destroy_event_channel(priv->err_chnl);
528                 priv->err_chnl = NULL;
529         }
530 }
531
532 int
533 mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
534 {
535         int ret;
536
537         if (!priv->eventc)
538                 /* All virtqs are in poll mode. */
539                 return 0;
540         if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
541                 pthread_mutex_init(&priv->timer_lock, NULL);
542                 pthread_cond_init(&priv->timer_cond, NULL);
543                 priv->timer_on = 0;
544                 ret = pthread_create(&priv->timer_tid, NULL,
545                                      mlx5_vdpa_poll_handle, (void *)priv);
546                 if (ret) {
547                         DRV_LOG(ERR, "Failed to create timer thread.");
548                         return -1;
549                 }
550         }
551         priv->intr_handle.fd = priv->eventc->fd;
552         priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
553         if (rte_intr_callback_register(&priv->intr_handle,
554                                        mlx5_vdpa_interrupt_handler, priv)) {
555                 priv->intr_handle.fd = 0;
556                 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
557                 goto error;
558         }
559         return 0;
560 error:
561         mlx5_vdpa_cqe_event_unset(priv);
562         return -1;
563 }
564
565 void
566 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
567 {
568         int retries = MLX5_VDPA_INTR_RETRIES;
569         int ret = -EAGAIN;
570         void *status;
571
572         if (priv->intr_handle.fd) {
573                 while (retries-- && ret == -EAGAIN) {
574                         ret = rte_intr_callback_unregister(&priv->intr_handle,
575                                                     mlx5_vdpa_interrupt_handler,
576                                                     priv);
577                         if (ret == -EAGAIN) {
578                                 DRV_LOG(DEBUG, "Try again to unregister fd %d "
579                                         "of CQ interrupt, retries = %d.",
580                                         priv->intr_handle.fd, retries);
581                                 rte_pause();
582                         }
583                 }
584                 memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
585         }
586         if (priv->timer_tid) {
587                 pthread_cancel(priv->timer_tid);
588                 pthread_join(priv->timer_tid, &status);
589         }
590         priv->timer_tid = 0;
591 }
592
593 void
594 mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp)
595 {
596         if (eqp->sw_qp)
597                 claim_zero(mlx5_devx_cmd_destroy(eqp->sw_qp));
598         if (eqp->umem_obj)
599                 claim_zero(mlx5_glue->devx_umem_dereg(eqp->umem_obj));
600         if (eqp->umem_buf)
601                 rte_free(eqp->umem_buf);
602         if (eqp->fw_qp)
603                 claim_zero(mlx5_devx_cmd_destroy(eqp->fw_qp));
604         mlx5_vdpa_cq_destroy(&eqp->cq);
605         memset(eqp, 0, sizeof(*eqp));
606 }
607
608 static int
609 mlx5_vdpa_qps2rts(struct mlx5_vdpa_event_qp *eqp)
610 {
611         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RST2INIT_QP,
612                                           eqp->sw_qp->id)) {
613                 DRV_LOG(ERR, "Failed to modify FW QP to INIT state(%u).",
614                         rte_errno);
615                 return -1;
616         }
617         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_RST2INIT_QP,
618                                           eqp->fw_qp->id)) {
619                 DRV_LOG(ERR, "Failed to modify SW QP to INIT state(%u).",
620                         rte_errno);
621                 return -1;
622         }
623         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_INIT2RTR_QP,
624                                           eqp->sw_qp->id)) {
625                 DRV_LOG(ERR, "Failed to modify FW QP to RTR state(%u).",
626                         rte_errno);
627                 return -1;
628         }
629         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_INIT2RTR_QP,
630                                           eqp->fw_qp->id)) {
631                 DRV_LOG(ERR, "Failed to modify SW QP to RTR state(%u).",
632                         rte_errno);
633                 return -1;
634         }
635         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RTR2RTS_QP,
636                                           eqp->sw_qp->id)) {
637                 DRV_LOG(ERR, "Failed to modify FW QP to RTS state(%u).",
638                         rte_errno);
639                 return -1;
640         }
641         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_RTR2RTS_QP,
642                                           eqp->fw_qp->id)) {
643                 DRV_LOG(ERR, "Failed to modify SW QP to RTS state(%u).",
644                         rte_errno);
645                 return -1;
646         }
647         return 0;
648 }
649
650 int
651 mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
652                           int callfd, struct mlx5_vdpa_event_qp *eqp)
653 {
654         struct mlx5_devx_qp_attr attr = {0};
655         uint16_t log_desc_n = rte_log2_u32(desc_n);
656         uint32_t umem_size = (1 << log_desc_n) * MLX5_WSEG_SIZE +
657                                                        sizeof(*eqp->db_rec) * 2;
658
659         if (mlx5_vdpa_event_qp_global_prepare(priv))
660                 return -1;
661         if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq))
662                 return -1;
663         attr.pd = priv->pdn;
664         eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr);
665         if (!eqp->fw_qp) {
666                 DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno);
667                 goto error;
668         }
669         eqp->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
670         if (!eqp->umem_buf) {
671                 DRV_LOG(ERR, "Failed to allocate memory for SW QP.");
672                 rte_errno = ENOMEM;
673                 goto error;
674         }
675         eqp->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx,
676                                                (void *)(uintptr_t)eqp->umem_buf,
677                                                umem_size,
678                                                IBV_ACCESS_LOCAL_WRITE);
679         if (!eqp->umem_obj) {
680                 DRV_LOG(ERR, "Failed to register umem for SW QP.");
681                 goto error;
682         }
683         attr.uar_index = priv->uar->page_id;
684         attr.cqn = eqp->cq.cq->id;
685         attr.log_page_size = rte_log2_u32(sysconf(_SC_PAGESIZE));
686         attr.rq_size = 1 << log_desc_n;
687         attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);
688         attr.sq_size = 0; /* No need SQ. */
689         attr.dbr_umem_valid = 1;
690         attr.wq_umem_id = eqp->umem_obj->umem_id;
691         attr.wq_umem_offset = 0;
692         attr.dbr_umem_id = eqp->umem_obj->umem_id;
693         attr.dbr_address = (1 << log_desc_n) * MLX5_WSEG_SIZE;
694         eqp->sw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr);
695         if (!eqp->sw_qp) {
696                 DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno);
697                 goto error;
698         }
699         eqp->db_rec = RTE_PTR_ADD(eqp->umem_buf, (uintptr_t)attr.dbr_address);
700         if (mlx5_vdpa_qps2rts(eqp))
701                 goto error;
702         /* First ringing. */
703         rte_write32(rte_cpu_to_be_32(1 << log_desc_n), &eqp->db_rec[0]);
704         return 0;
705 error:
706         mlx5_vdpa_event_qp_destroy(eqp);
707         return -1;
708 }