vdpa/mlx5: add task ring for multi-thread management
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa_event.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <stdint.h>
6 #include <sched.h>
7 #include <fcntl.h>
8 #include <sys/eventfd.h>
9
10 #include <rte_malloc.h>
11 #include <rte_memory.h>
12 #include <rte_errno.h>
13 #include <rte_lcore.h>
14 #include <rte_atomic.h>
15 #include <rte_common.h>
16 #include <rte_io.h>
17 #include <rte_alarm.h>
18
19 #include <mlx5_common.h>
20 #include <mlx5_common_os.h>
21 #include <mlx5_common_devx.h>
22 #include <mlx5_glue.h>
23
24 #include "mlx5_vdpa_utils.h"
25 #include "mlx5_vdpa.h"
26
27
28 #define MLX5_VDPA_ERROR_TIME_SEC 3u
29
30 void
31 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
32 {
33         mlx5_devx_uar_release(&priv->uar);
34 #ifdef HAVE_IBV_DEVX_EVENT
35         if (priv->eventc) {
36                 mlx5_os_devx_destroy_event_channel(priv->eventc);
37                 priv->eventc = NULL;
38         }
39 #endif
40 }
41
42 /* Prepare all the global resources for all the event objects.*/
43 int
44 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
45 {
46         priv->eventc = mlx5_os_devx_create_event_channel(priv->cdev->ctx,
47                            MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
48         if (!priv->eventc) {
49                 rte_errno = errno;
50                 DRV_LOG(ERR, "Failed to create event channel %d.",
51                         rte_errno);
52                 goto error;
53         }
54         if (mlx5_devx_uar_prepare(priv->cdev, &priv->uar) != 0) {
55                 DRV_LOG(ERR, "Failed to allocate UAR.");
56                 goto error;
57         }
58         return 0;
59 error:
60         mlx5_vdpa_event_qp_global_release(priv);
61         return -1;
62 }
63
64 static void
65 mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq)
66 {
67         mlx5_devx_cq_destroy(&cq->cq_obj);
68         memset(cq, 0, sizeof(*cq));
69 }
70
71 static inline void __rte_unused
72 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
73 {
74         uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
75         uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK;
76         uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
77         uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq_obj.cq->id;
78         uint64_t db_be = rte_cpu_to_be_64(doorbell);
79
80         mlx5_doorbell_ring(&priv->uar.cq_db, db_be, doorbell_hi,
81                            &cq->cq_obj.db_rec[MLX5_CQ_ARM_DB], 0);
82         cq->arm_sn++;
83         cq->armed = 1;
84 }
85
86 static int
87 mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n,
88                 int callfd, struct mlx5_vdpa_virtq *virtq)
89 {
90         struct mlx5_devx_cq_attr attr = {
91                 .use_first_only = 1,
92                 .uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
93         };
94         struct mlx5_vdpa_cq *cq = &virtq->eqp.cq;
95         uint16_t event_nums[1] = {0};
96         int ret;
97
98         ret = mlx5_devx_cq_create(priv->cdev->ctx, &cq->cq_obj, log_desc_n,
99                                   &attr, SOCKET_ID_ANY);
100         if (ret)
101                 goto error;
102         cq->cq_ci = 0;
103         cq->log_desc_n = log_desc_n;
104         rte_spinlock_init(&cq->sl);
105         /* Subscribe CQ event to the event channel controlled by the driver. */
106         ret = mlx5_glue->devx_subscribe_devx_event(priv->eventc,
107                                                         cq->cq_obj.cq->obj,
108                                                    sizeof(event_nums),
109                                                    event_nums,
110                                                    (uint64_t)(uintptr_t)virtq);
111         if (ret) {
112                 DRV_LOG(ERR, "Failed to subscribe CQE event.");
113                 rte_errno = errno;
114                 goto error;
115         }
116         cq->callfd = callfd;
117         /* Init CQ to ones to be in HW owner in the start. */
118         cq->cq_obj.cqes[0].op_own = MLX5_CQE_OWNER_MASK;
119         cq->cq_obj.cqes[0].wqe_counter = rte_cpu_to_be_16(UINT16_MAX);
120         /* First arming. */
121         mlx5_vdpa_cq_arm(priv, cq);
122         return 0;
123 error:
124         mlx5_vdpa_cq_destroy(cq);
125         return -1;
126 }
127
128 static inline uint32_t
129 mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
130 {
131         struct mlx5_vdpa_event_qp *eqp =
132                                 container_of(cq, struct mlx5_vdpa_event_qp, cq);
133         const unsigned int cq_size = 1 << cq->log_desc_n;
134         union {
135                 struct {
136                         uint16_t wqe_counter;
137                         uint8_t rsvd5;
138                         uint8_t op_own;
139                 };
140                 uint32_t word;
141         } last_word;
142         uint16_t next_wqe_counter = eqp->qp_pi;
143         uint16_t cur_wqe_counter;
144         uint16_t comp;
145
146         last_word.word = rte_read32(&cq->cq_obj.cqes[0].wqe_counter);
147         cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
148         comp = cur_wqe_counter + (uint16_t)1 - next_wqe_counter;
149         if (comp) {
150                 cq->cq_ci += comp;
151                 MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
152                             MLX5_CQE_INVALID);
153                 if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
154                                MLX5_CQE_RESP_ERR ||
155                                MLX5_CQE_OPCODE(last_word.op_own) ==
156                                MLX5_CQE_REQ_ERR)))
157                         cq->errors++;
158                 rte_io_wmb();
159                 /* Ring CQ doorbell record. */
160                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
161                 eqp->qp_pi += comp;
162                 rte_io_wmb();
163                 /* Ring SW QP doorbell record. */
164                 eqp->sw_qp.db_rec[0] = rte_cpu_to_be_32(eqp->qp_pi + cq_size);
165         }
166         return comp;
167 }
168
169 static void
170 mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
171 {
172         struct mlx5_vdpa_virtq *virtq;
173         struct mlx5_vdpa_cq *cq;
174         int i;
175
176         for (i = 0; i < priv->nr_virtqs; i++) {
177                 virtq = &priv->virtqs[i];
178                 pthread_mutex_lock(&virtq->virtq_lock);
179                 cq = &priv->virtqs[i].eqp.cq;
180                 if (cq->cq_obj.cq && !cq->armed)
181                         mlx5_vdpa_cq_arm(priv, cq);
182                 pthread_mutex_unlock(&virtq->virtq_lock);
183         }
184 }
185
186 static void
187 mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
188 {
189         if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
190                 switch (max) {
191                 case 0:
192                         priv->timer_delay_us += priv->event_us;
193                         break;
194                 case 1:
195                         break;
196                 default:
197                         priv->timer_delay_us /= max;
198                         break;
199                 }
200         }
201         if (priv->timer_delay_us)
202                 usleep(priv->timer_delay_us);
203         else
204                 /* Give-up CPU to improve polling threads scheduling. */
205                 sched_yield();
206 }
207
208 /* Notify virtio device for specific virtq new traffic. */
209 static uint32_t
210 mlx5_vdpa_queue_complete(struct mlx5_vdpa_cq *cq)
211 {
212         uint32_t comp = 0;
213
214         if (cq->cq_obj.cq) {
215                 comp = mlx5_vdpa_cq_poll(cq);
216                 if (comp) {
217                         if (cq->callfd != -1)
218                                 eventfd_write(cq->callfd, (eventfd_t)1);
219                         cq->armed = 0;
220                 }
221         }
222         return comp;
223 }
224
225 /* Notify virtio device for any virtq new traffic. */
226 static uint32_t
227 mlx5_vdpa_queues_complete(struct mlx5_vdpa_priv *priv)
228 {
229         struct mlx5_vdpa_virtq *virtq;
230         struct mlx5_vdpa_cq *cq;
231         uint32_t max = 0;
232         uint32_t comp;
233         int i;
234
235         for (i = 0; i < priv->nr_virtqs; i++) {
236                 virtq = &priv->virtqs[i];
237                 pthread_mutex_lock(&virtq->virtq_lock);
238                 cq = &virtq->eqp.cq;
239                 comp = mlx5_vdpa_queue_complete(cq);
240                 pthread_mutex_unlock(&virtq->virtq_lock);
241                 if (comp > max)
242                         max = comp;
243         }
244         return max;
245 }
246
247 void
248 mlx5_vdpa_drain_cq(struct mlx5_vdpa_priv *priv)
249 {
250         unsigned int i;
251
252         for (i = 0; i < priv->caps.max_num_virtio_queues * 2; i++) {
253                 struct mlx5_vdpa_cq *cq = &priv->virtqs[i].eqp.cq;
254
255                 mlx5_vdpa_queue_complete(cq);
256                 if (cq->cq_obj.cq) {
257                         cq->cq_obj.cqes[0].wqe_counter =
258                                 rte_cpu_to_be_16(UINT16_MAX);
259                         priv->virtqs[i].eqp.qp_pi = 0;
260                         if (!cq->armed)
261                                 mlx5_vdpa_cq_arm(priv, cq);
262                 }
263         }
264 }
265
266 /* Wait on all CQs channel for completion event. */
267 static struct mlx5_vdpa_virtq *
268 mlx5_vdpa_event_wait(struct mlx5_vdpa_priv *priv __rte_unused)
269 {
270 #ifdef HAVE_IBV_DEVX_EVENT
271         union {
272                 struct mlx5dv_devx_async_event_hdr event_resp;
273                 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
274         } out;
275         int ret = mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
276                                             sizeof(out.buf));
277
278         if (ret >= 0)
279                 return (struct mlx5_vdpa_virtq *)
280                                 (uintptr_t)out.event_resp.cookie;
281         DRV_LOG(INFO, "Got error in devx_get_event, ret = %d, errno = %d.",
282                 ret, errno);
283 #endif
284         return NULL;
285 }
286
287 static void *
288 mlx5_vdpa_event_handle(void *arg)
289 {
290         struct mlx5_vdpa_priv *priv = arg;
291         struct mlx5_vdpa_virtq *virtq;
292         uint32_t max;
293
294         switch (priv->event_mode) {
295         case MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER:
296         case MLX5_VDPA_EVENT_MODE_FIXED_TIMER:
297                 priv->timer_delay_us = priv->event_us;
298                 while (1) {
299                         max = mlx5_vdpa_queues_complete(priv);
300                         if (max == 0 && priv->no_traffic_counter++ >=
301                             priv->no_traffic_max) {
302                                 DRV_LOG(DEBUG, "Device %s traffic was stopped.",
303                                         priv->vdev->device->name);
304                                 mlx5_vdpa_arm_all_cqs(priv);
305                                 do {
306                                         virtq = mlx5_vdpa_event_wait(priv);
307                                         if (virtq == NULL)
308                                                 break;
309                                         pthread_mutex_lock(
310                                                 &virtq->virtq_lock);
311                                         if (mlx5_vdpa_queue_complete(
312                                                 &virtq->eqp.cq) > 0) {
313                                                 pthread_mutex_unlock(
314                                                         &virtq->virtq_lock);
315                                                 break;
316                                         }
317                                         pthread_mutex_unlock(
318                                                 &virtq->virtq_lock);
319                                 } while (1);
320                                 priv->timer_delay_us = priv->event_us;
321                                 priv->no_traffic_counter = 0;
322                         } else if (max != 0) {
323                                 priv->no_traffic_counter = 0;
324                         }
325                         mlx5_vdpa_timer_sleep(priv, max);
326                 }
327                 return NULL;
328         case MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT:
329                 do {
330                         virtq = mlx5_vdpa_event_wait(priv);
331                         if (virtq != NULL) {
332                                 pthread_mutex_lock(&virtq->virtq_lock);
333                                 if (mlx5_vdpa_queue_complete(
334                                         &virtq->eqp.cq) > 0)
335                                         mlx5_vdpa_cq_arm(priv, &virtq->eqp.cq);
336                                 pthread_mutex_unlock(&virtq->virtq_lock);
337                         }
338                 } while (1);
339                 return NULL;
340         default:
341                 return NULL;
342         }
343 }
344
345 static void
346 mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
347 {
348 #ifdef HAVE_IBV_DEVX_EVENT
349         struct mlx5_vdpa_priv *priv = cb_arg;
350         union {
351                 struct mlx5dv_devx_async_event_hdr event_resp;
352                 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
353         } out;
354         uint32_t vq_index, i, version;
355         struct mlx5_vdpa_virtq *virtq;
356         uint64_t sec;
357
358         while (mlx5_glue->devx_get_event(priv->err_chnl, &out.event_resp,
359                                          sizeof(out.buf)) >=
360                                        (ssize_t)sizeof(out.event_resp.cookie)) {
361                 vq_index = out.event_resp.cookie & UINT32_MAX;
362                 version = out.event_resp.cookie >> 32;
363                 if (vq_index >= priv->nr_virtqs) {
364                         DRV_LOG(ERR, "Invalid device %s error event virtq %d.",
365                                 priv->vdev->device->name, vq_index);
366                         continue;
367                 }
368                 virtq = &priv->virtqs[vq_index];
369                 pthread_mutex_lock(&virtq->virtq_lock);
370                 if (!virtq->enable || virtq->version != version)
371                         goto unlock;
372                 if (rte_rdtsc() / rte_get_tsc_hz() < MLX5_VDPA_ERROR_TIME_SEC)
373                         goto unlock;
374                 virtq->stopped = true;
375                 /* Query error info. */
376                 if (mlx5_vdpa_virtq_query(priv, vq_index))
377                         goto log;
378                 /* Disable vq. */
379                 if (mlx5_vdpa_virtq_enable(priv, vq_index, 0)) {
380                         DRV_LOG(ERR, "Failed to disable virtq %d.", vq_index);
381                         goto log;
382                 }
383                 /* Retry if error happens less than N times in 3 seconds. */
384                 sec = (rte_rdtsc() - virtq->err_time[0]) / rte_get_tsc_hz();
385                 if (sec > MLX5_VDPA_ERROR_TIME_SEC) {
386                         /* Retry. */
387                         if (mlx5_vdpa_virtq_enable(priv, vq_index, 1))
388                                 DRV_LOG(ERR, "Failed to enable virtq %d.",
389                                         vq_index);
390                         else
391                                 DRV_LOG(WARNING, "Recover virtq %d: %u.",
392                                         vq_index, ++virtq->n_retry);
393                 } else {
394                         /* Retry timeout, give up. */
395                         DRV_LOG(ERR, "Device %s virtq %d failed to recover.",
396                                 priv->vdev->device->name, vq_index);
397                 }
398 log:
399                 /* Shift in current time to error time log end. */
400                 for (i = 1; i < RTE_DIM(virtq->err_time); i++)
401                         virtq->err_time[i - 1] = virtq->err_time[i];
402                 virtq->err_time[RTE_DIM(virtq->err_time) - 1] = rte_rdtsc();
403 unlock:
404                 pthread_mutex_unlock(&virtq->virtq_lock);
405         }
406 #endif
407 }
408
409 int
410 mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv)
411 {
412         int ret;
413         int flags;
414
415         /* Setup device event channel. */
416         priv->err_chnl = mlx5_glue->devx_create_event_channel(priv->cdev->ctx,
417                                                               0);
418         if (!priv->err_chnl) {
419                 rte_errno = errno;
420                 DRV_LOG(ERR, "Failed to create device event channel %d.",
421                         rte_errno);
422                 goto error;
423         }
424         flags = fcntl(priv->err_chnl->fd, F_GETFL);
425         ret = fcntl(priv->err_chnl->fd, F_SETFL, flags | O_NONBLOCK);
426         if (ret) {
427                 rte_errno = errno;
428                 DRV_LOG(ERR, "Failed to change device event channel FD.");
429                 goto error;
430         }
431         priv->err_intr_handle =
432                 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
433         if (priv->err_intr_handle == NULL) {
434                 DRV_LOG(ERR, "Fail to allocate intr_handle");
435                 goto error;
436         }
437         if (rte_intr_fd_set(priv->err_intr_handle, priv->err_chnl->fd))
438                 goto error;
439
440         if (rte_intr_type_set(priv->err_intr_handle, RTE_INTR_HANDLE_EXT))
441                 goto error;
442
443         ret = rte_intr_callback_register(priv->err_intr_handle,
444                                          mlx5_vdpa_err_interrupt_handler,
445                                          priv);
446         if (ret != 0) {
447                 rte_intr_fd_set(priv->err_intr_handle, 0);
448                 DRV_LOG(ERR, "Failed to register error interrupt for device %d.",
449                         priv->vid);
450                 rte_errno = -ret;
451                 goto error;
452         } else {
453                 DRV_LOG(DEBUG, "Registered error interrupt for device%d.",
454                         priv->vid);
455         }
456         return 0;
457 error:
458         mlx5_vdpa_err_event_unset(priv);
459         return -1;
460 }
461
462 void
463 mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv)
464 {
465         int retries = MLX5_VDPA_INTR_RETRIES;
466         int ret = -EAGAIN;
467
468         if (!rte_intr_fd_get(priv->err_intr_handle))
469                 return;
470         while (retries-- && ret == -EAGAIN) {
471                 ret = rte_intr_callback_unregister(priv->err_intr_handle,
472                                             mlx5_vdpa_err_interrupt_handler,
473                                             priv);
474                 if (ret == -EAGAIN) {
475                         DRV_LOG(DEBUG, "Try again to unregister fd %d "
476                                 "of error interrupt, retries = %d.",
477                                 rte_intr_fd_get(priv->err_intr_handle),
478                                 retries);
479                         rte_pause();
480                 }
481         }
482         if (priv->err_chnl) {
483 #ifdef HAVE_IBV_DEVX_EVENT
484                 union {
485                         struct mlx5dv_devx_async_event_hdr event_resp;
486                         uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) +
487                                     128];
488                 } out;
489
490                 /* Clean all pending events. */
491                 while (mlx5_glue->devx_get_event(priv->err_chnl,
492                        &out.event_resp, sizeof(out.buf)) >=
493                        (ssize_t)sizeof(out.event_resp.cookie))
494                         ;
495 #endif
496                 mlx5_glue->devx_destroy_event_channel(priv->err_chnl);
497                 priv->err_chnl = NULL;
498         }
499         rte_intr_instance_free(priv->err_intr_handle);
500 }
501
502 int
503 mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
504 {
505         int ret;
506         rte_cpuset_t cpuset;
507         pthread_attr_t attr;
508         char name[16];
509         const struct sched_param sp = {
510                 .sched_priority = sched_get_priority_max(SCHED_RR) - 1,
511         };
512
513         if (!priv->eventc)
514                 /* All virtqs are in poll mode. */
515                 return 0;
516         pthread_attr_init(&attr);
517         ret = pthread_attr_setschedpolicy(&attr, SCHED_RR);
518         if (ret) {
519                 DRV_LOG(ERR, "Failed to set thread sched policy = RR.");
520                 return -1;
521         }
522         ret = pthread_attr_setschedparam(&attr, &sp);
523         if (ret) {
524                 DRV_LOG(ERR, "Failed to set thread priority.");
525                 return -1;
526         }
527         ret = pthread_create(&priv->timer_tid, &attr, mlx5_vdpa_event_handle,
528                              (void *)priv);
529         if (ret) {
530                 DRV_LOG(ERR, "Failed to create timer thread.");
531                 return -1;
532         }
533         CPU_ZERO(&cpuset);
534         if (priv->event_core != -1)
535                 CPU_SET(priv->event_core, &cpuset);
536         else
537                 cpuset = rte_lcore_cpuset(rte_get_main_lcore());
538         ret = pthread_setaffinity_np(priv->timer_tid, sizeof(cpuset), &cpuset);
539         if (ret) {
540                 DRV_LOG(ERR, "Failed to set thread affinity.");
541                 return -1;
542         }
543         snprintf(name, sizeof(name), "vDPA-mlx5-%d", priv->vid);
544         ret = rte_thread_setname(priv->timer_tid, name);
545         if (ret)
546                 DRV_LOG(DEBUG, "Cannot set timer thread name.");
547         return 0;
548 }
549
550 void
551 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
552 {
553         struct mlx5_vdpa_virtq *virtq;
554         void *status;
555         int i;
556
557         if (priv->timer_tid) {
558                 pthread_cancel(priv->timer_tid);
559                 pthread_join(priv->timer_tid, &status);
560                 /* The mutex may stay locked after event thread cancel, initiate it. */
561                 for (i = 0; i < priv->nr_virtqs; i++) {
562                         virtq = &priv->virtqs[i];
563                         pthread_mutex_init(&virtq->virtq_lock, NULL);
564                 }
565         }
566         priv->timer_tid = 0;
567 }
568
569 void
570 mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp)
571 {
572         mlx5_devx_qp_destroy(&eqp->sw_qp);
573         if (eqp->fw_qp)
574                 claim_zero(mlx5_devx_cmd_destroy(eqp->fw_qp));
575         mlx5_vdpa_cq_destroy(&eqp->cq);
576         memset(eqp, 0, sizeof(*eqp));
577 }
578
579 static int
580 mlx5_vdpa_qps2rts(struct mlx5_vdpa_event_qp *eqp)
581 {
582         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RST2INIT_QP,
583                                           eqp->sw_qp.qp->id)) {
584                 DRV_LOG(ERR, "Failed to modify FW QP to INIT state(%u).",
585                         rte_errno);
586                 return -1;
587         }
588         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp,
589                         MLX5_CMD_OP_RST2INIT_QP, eqp->fw_qp->id)) {
590                 DRV_LOG(ERR, "Failed to modify SW QP to INIT state(%u).",
591                         rte_errno);
592                 return -1;
593         }
594         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_INIT2RTR_QP,
595                                           eqp->sw_qp.qp->id)) {
596                 DRV_LOG(ERR, "Failed to modify FW QP to RTR state(%u).",
597                         rte_errno);
598                 return -1;
599         }
600         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp,
601                         MLX5_CMD_OP_INIT2RTR_QP, eqp->fw_qp->id)) {
602                 DRV_LOG(ERR, "Failed to modify SW QP to RTR state(%u).",
603                         rte_errno);
604                 return -1;
605         }
606         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RTR2RTS_QP,
607                                           eqp->sw_qp.qp->id)) {
608                 DRV_LOG(ERR, "Failed to modify FW QP to RTS state(%u).",
609                         rte_errno);
610                 return -1;
611         }
612         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp, MLX5_CMD_OP_RTR2RTS_QP,
613                                           eqp->fw_qp->id)) {
614                 DRV_LOG(ERR, "Failed to modify SW QP to RTS state(%u).",
615                         rte_errno);
616                 return -1;
617         }
618         return 0;
619 }
620
621 static int
622 mlx5_vdpa_qps2rst2rts(struct mlx5_vdpa_event_qp *eqp)
623 {
624         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_QP_2RST,
625                                           eqp->sw_qp.qp->id)) {
626                 DRV_LOG(ERR, "Failed to modify FW QP to RST state(%u).",
627                         rte_errno);
628                 return -1;
629         }
630         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp.qp,
631                         MLX5_CMD_OP_QP_2RST, eqp->fw_qp->id)) {
632                 DRV_LOG(ERR, "Failed to modify SW QP to RST state(%u).",
633                         rte_errno);
634                 return -1;
635         }
636         return mlx5_vdpa_qps2rts(eqp);
637 }
638
639 int
640 mlx5_vdpa_event_qp_prepare(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
641         int callfd, struct mlx5_vdpa_virtq *virtq)
642 {
643         struct mlx5_vdpa_event_qp *eqp = &virtq->eqp;
644         struct mlx5_devx_qp_attr attr = {0};
645         uint16_t log_desc_n = rte_log2_u32(desc_n);
646         uint32_t ret;
647
648         if (eqp->cq.cq_obj.cq != NULL && log_desc_n == eqp->cq.log_desc_n) {
649                 /* Reuse existing resources. */
650                 eqp->cq.callfd = callfd;
651                 /* FW will set event qp to error state in q destroy. */
652                 if (!mlx5_vdpa_qps2rst2rts(eqp)) {
653                         rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)),
654                                         &eqp->sw_qp.db_rec[0]);
655                         return 0;
656                 }
657         }
658         if (eqp->fw_qp)
659                 mlx5_vdpa_event_qp_destroy(eqp);
660         if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, virtq) ||
661                 !eqp->cq.cq_obj.cq)
662                 return -1;
663         attr.pd = priv->cdev->pdn;
664         attr.ts_format =
665                 mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
666         eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->cdev->ctx, &attr);
667         if (!eqp->fw_qp) {
668                 DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno);
669                 goto error;
670         }
671         attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
672         attr.cqn = eqp->cq.cq_obj.cq->id;
673         attr.num_of_receive_wqes = RTE_BIT32(log_desc_n);
674         attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);
675         attr.num_of_send_wqbbs = 0; /* No need SQ. */
676         attr.ts_format =
677                 mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
678         ret = mlx5_devx_qp_create(priv->cdev->ctx, &(eqp->sw_qp),
679                                   attr.num_of_receive_wqes * MLX5_WSEG_SIZE,
680                                   &attr, SOCKET_ID_ANY);
681         if (ret) {
682                 DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno);
683                 goto error;
684         }
685         if (mlx5_vdpa_qps2rts(eqp))
686                 goto error;
687         eqp->qp_pi = 0;
688         /* First ringing. */
689         if (eqp->sw_qp.db_rec)
690                 rte_write32(rte_cpu_to_be_32(RTE_BIT32(log_desc_n)),
691                         &eqp->sw_qp.db_rec[0]);
692         return 0;
693 error:
694         mlx5_vdpa_event_qp_destroy(eqp);
695         return -1;
696 }
697