vdpa/mlx5: fix event queue number query
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa_event.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <stdint.h>
6 #include <fcntl.h>
7 #include <sys/eventfd.h>
8
9 #include <rte_malloc.h>
10 #include <rte_errno.h>
11 #include <rte_lcore.h>
12 #include <rte_atomic.h>
13 #include <rte_common.h>
14 #include <rte_io.h>
15 #include <rte_alarm.h>
16
17 #include <mlx5_common.h>
18
19 #include "mlx5_vdpa_utils.h"
20 #include "mlx5_vdpa.h"
21
22
23 void
24 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
25 {
26         if (priv->uar) {
27                 mlx5_glue->devx_free_uar(priv->uar);
28                 priv->uar = NULL;
29         }
30 #ifdef HAVE_IBV_DEVX_EVENT
31         if (priv->eventc) {
32                 union {
33                         struct mlx5dv_devx_async_event_hdr event_resp;
34                         uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr)
35                                                                          + 128];
36                 } out;
37
38                 /* Clean all pending events. */
39                 while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
40                        sizeof(out.buf)) >=
41                        (ssize_t)sizeof(out.event_resp.cookie))
42                         ;
43                 mlx5_glue->devx_destroy_event_channel(priv->eventc);
44                 priv->eventc = NULL;
45         }
46 #endif
47         priv->eqn = 0;
48 }
49
50 /* Prepare all the global resources for all the event objects.*/
51 static int
52 mlx5_vdpa_event_qp_global_prepare(struct mlx5_vdpa_priv *priv)
53 {
54         if (priv->eventc)
55                 return 0;
56         if (mlx5_glue->devx_query_eqn(priv->ctx, 0, &priv->eqn)) {
57                 rte_errno = errno;
58                 DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno);
59                 return -1;
60         }
61         priv->eventc = mlx5_glue->devx_create_event_channel(priv->ctx,
62                            MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
63         if (!priv->eventc) {
64                 rte_errno = errno;
65                 DRV_LOG(ERR, "Failed to create event channel %d.",
66                         rte_errno);
67                 goto error;
68         }
69         priv->uar = mlx5_glue->devx_alloc_uar(priv->ctx, 0);
70         if (!priv->uar) {
71                 rte_errno = errno;
72                 DRV_LOG(ERR, "Failed to allocate UAR.");
73                 goto error;
74         }
75         return 0;
76 error:
77         mlx5_vdpa_event_qp_global_release(priv);
78         return -1;
79 }
80
81 static void
82 mlx5_vdpa_cq_destroy(struct mlx5_vdpa_cq *cq)
83 {
84         if (cq->cq)
85                 claim_zero(mlx5_devx_cmd_destroy(cq->cq));
86         if (cq->umem_obj)
87                 claim_zero(mlx5_glue->devx_umem_dereg(cq->umem_obj));
88         if (cq->umem_buf)
89                 rte_free((void *)(uintptr_t)cq->umem_buf);
90         memset(cq, 0, sizeof(*cq));
91 }
92
93 static inline void __rte_unused
94 mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
95 {
96         uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
97         uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK;
98         uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
99         uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq->id;
100         uint64_t db_be = rte_cpu_to_be_64(doorbell);
101         uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, MLX5_CQ_DOORBELL);
102
103         rte_io_wmb();
104         cq->db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
105         rte_wmb();
106 #ifdef RTE_ARCH_64
107         *(uint64_t *)addr = db_be;
108 #else
109         *(uint32_t *)addr = db_be;
110         rte_io_wmb();
111         *((uint32_t *)addr + 1) = db_be >> 32;
112 #endif
113         cq->arm_sn++;
114         cq->armed = 1;
115 }
116
117 static int
118 mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t log_desc_n,
119                     int callfd, struct mlx5_vdpa_cq *cq)
120 {
121         struct mlx5_devx_cq_attr attr = {0};
122         size_t pgsize = sysconf(_SC_PAGESIZE);
123         uint32_t umem_size;
124         uint16_t event_nums[1] = {0};
125         uint16_t cq_size = 1 << log_desc_n;
126         int ret;
127
128         cq->log_desc_n = log_desc_n;
129         umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
130         cq->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
131         if (!cq->umem_buf) {
132                 DRV_LOG(ERR, "Failed to allocate memory for CQ.");
133                 rte_errno = ENOMEM;
134                 return -ENOMEM;
135         }
136         cq->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx,
137                                                 (void *)(uintptr_t)cq->umem_buf,
138                                                 umem_size,
139                                                 IBV_ACCESS_LOCAL_WRITE);
140         if (!cq->umem_obj) {
141                 DRV_LOG(ERR, "Failed to register umem for CQ.");
142                 goto error;
143         }
144         attr.q_umem_valid = 1;
145         attr.db_umem_valid = 1;
146         attr.use_first_only = 1;
147         attr.overrun_ignore = 0;
148         attr.uar_page_id = priv->uar->page_id;
149         attr.q_umem_id = cq->umem_obj->umem_id;
150         attr.q_umem_offset = 0;
151         attr.db_umem_id = cq->umem_obj->umem_id;
152         attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
153         attr.eqn = priv->eqn;
154         attr.log_cq_size = log_desc_n;
155         attr.log_page_size = rte_log2_u32(pgsize);
156         cq->cq = mlx5_devx_cmd_create_cq(priv->ctx, &attr);
157         if (!cq->cq)
158                 goto error;
159         cq->db_rec = RTE_PTR_ADD(cq->umem_buf, (uintptr_t)attr.db_umem_offset);
160         cq->cq_ci = 0;
161         rte_spinlock_init(&cq->sl);
162         /* Subscribe CQ event to the event channel controlled by the driver. */
163         ret = mlx5_glue->devx_subscribe_devx_event(priv->eventc, cq->cq->obj,
164                                                    sizeof(event_nums),
165                                                    event_nums,
166                                                    (uint64_t)(uintptr_t)cq);
167         if (ret) {
168                 DRV_LOG(ERR, "Failed to subscribe CQE event.");
169                 rte_errno = errno;
170                 goto error;
171         }
172         cq->callfd = callfd;
173         /* Init CQ to ones to be in HW owner in the start. */
174         cq->cqes[0].op_own = MLX5_CQE_OWNER_MASK;
175         cq->cqes[0].wqe_counter = rte_cpu_to_be_16(cq_size - 1);
176         /* First arming. */
177         mlx5_vdpa_cq_arm(priv, cq);
178         return 0;
179 error:
180         mlx5_vdpa_cq_destroy(cq);
181         return -1;
182 }
183
184 static inline uint32_t
185 mlx5_vdpa_cq_poll(struct mlx5_vdpa_cq *cq)
186 {
187         struct mlx5_vdpa_event_qp *eqp =
188                                 container_of(cq, struct mlx5_vdpa_event_qp, cq);
189         const unsigned int cq_size = 1 << cq->log_desc_n;
190         const unsigned int cq_mask = cq_size - 1;
191         union {
192                 struct {
193                         uint16_t wqe_counter;
194                         uint8_t rsvd5;
195                         uint8_t op_own;
196                 };
197                 uint32_t word;
198         } last_word;
199         uint16_t next_wqe_counter = cq->cq_ci & cq_mask;
200         uint16_t cur_wqe_counter;
201         uint16_t comp;
202
203         last_word.word = rte_read32(&cq->cqes[0].wqe_counter);
204         cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
205         comp = (cur_wqe_counter + 1u - next_wqe_counter) & cq_mask;
206         if (comp) {
207                 cq->cq_ci += comp;
208                 MLX5_ASSERT(!!(cq->cq_ci & cq_size) ==
209                             MLX5_CQE_OWNER(last_word.op_own));
210                 MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
211                             MLX5_CQE_INVALID);
212                 if (unlikely(!(MLX5_CQE_OPCODE(last_word.op_own) ==
213                                MLX5_CQE_RESP_ERR ||
214                                MLX5_CQE_OPCODE(last_word.op_own) ==
215                                MLX5_CQE_REQ_ERR)))
216                         cq->errors++;
217                 rte_io_wmb();
218                 /* Ring CQ doorbell record. */
219                 cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
220                 rte_io_wmb();
221                 /* Ring SW QP doorbell record. */
222                 eqp->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci + cq_size);
223         }
224         return comp;
225 }
226
227 static void
228 mlx5_vdpa_arm_all_cqs(struct mlx5_vdpa_priv *priv)
229 {
230         struct mlx5_vdpa_cq *cq;
231         int i;
232
233         for (i = 0; i < priv->nr_virtqs; i++) {
234                 cq = &priv->virtqs[i].eqp.cq;
235                 if (cq->cq && !cq->armed)
236                         mlx5_vdpa_cq_arm(priv, cq);
237         }
238 }
239
240 static void
241 mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
242 {
243         if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
244                 switch (max) {
245                 case 0:
246                         priv->timer_delay_us += priv->event_us;
247                         break;
248                 case 1:
249                         break;
250                 default:
251                         priv->timer_delay_us /= max;
252                         break;
253                 }
254         }
255         usleep(priv->timer_delay_us);
256 }
257
258 static void *
259 mlx5_vdpa_poll_handle(void *arg)
260 {
261         struct mlx5_vdpa_priv *priv = arg;
262         int i;
263         struct mlx5_vdpa_cq *cq;
264         uint32_t max;
265         uint64_t current_tic;
266
267         pthread_mutex_lock(&priv->timer_lock);
268         while (!priv->timer_on)
269                 pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
270         pthread_mutex_unlock(&priv->timer_lock);
271         priv->timer_delay_us = priv->event_mode ==
272                                             MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
273                                               MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
274                                                                  priv->event_us;
275         while (1) {
276                 max = 0;
277                 for (i = 0; i < priv->nr_virtqs; i++) {
278                         cq = &priv->virtqs[i].eqp.cq;
279                         if (cq->cq && !cq->armed) {
280                                 uint32_t comp = mlx5_vdpa_cq_poll(cq);
281
282                                 if (comp) {
283                                         /* Notify guest for descs consuming. */
284                                         if (cq->callfd != -1)
285                                                 eventfd_write(cq->callfd,
286                                                               (eventfd_t)1);
287                                         if (comp > max)
288                                                 max = comp;
289                                 }
290                         }
291                 }
292                 current_tic = rte_rdtsc();
293                 if (!max) {
294                         /* No traffic ? stop timer and load interrupts. */
295                         if (current_tic - priv->last_traffic_tic >=
296                             rte_get_timer_hz() * priv->no_traffic_time_s) {
297                                 DRV_LOG(DEBUG, "Device %s traffic was stopped.",
298                                         priv->vdev->device->name);
299                                 mlx5_vdpa_arm_all_cqs(priv);
300                                 pthread_mutex_lock(&priv->timer_lock);
301                                 priv->timer_on = 0;
302                                 while (!priv->timer_on)
303                                         pthread_cond_wait(&priv->timer_cond,
304                                                           &priv->timer_lock);
305                                 pthread_mutex_unlock(&priv->timer_lock);
306                                 priv->timer_delay_us = priv->event_mode ==
307                                             MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
308                                               MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
309                                                                  priv->event_us;
310                                 continue;
311                         }
312                 } else {
313                         priv->last_traffic_tic = current_tic;
314                 }
315                 mlx5_vdpa_timer_sleep(priv, max);
316         }
317         return NULL;
318 }
319
320 static void
321 mlx5_vdpa_interrupt_handler(void *cb_arg)
322 {
323         struct mlx5_vdpa_priv *priv = cb_arg;
324 #ifdef HAVE_IBV_DEVX_EVENT
325         union {
326                 struct mlx5dv_devx_async_event_hdr event_resp;
327                 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
328         } out;
329
330         while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
331                                          sizeof(out.buf)) >=
332                                        (ssize_t)sizeof(out.event_resp.cookie)) {
333                 struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
334                                                (uintptr_t)out.event_resp.cookie;
335                 struct mlx5_vdpa_event_qp *eqp = container_of(cq,
336                                                  struct mlx5_vdpa_event_qp, cq);
337                 struct mlx5_vdpa_virtq *virtq = container_of(eqp,
338                                                    struct mlx5_vdpa_virtq, eqp);
339
340                 mlx5_vdpa_cq_poll(cq);
341                 /* Notify guest for descs consuming. */
342                 if (cq->callfd != -1)
343                         eventfd_write(cq->callfd, (eventfd_t)1);
344                 if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
345                         mlx5_vdpa_cq_arm(priv, cq);
346                         return;
347                 }
348                 /* Don't arm again - timer will take control. */
349                 DRV_LOG(DEBUG, "Device %s virtq %d cq %d event was captured."
350                         " Timer is %s, cq ci is %u.\n",
351                         priv->vdev->device->name,
352                         (int)virtq->index, cq->cq->id,
353                         priv->timer_on ? "on" : "off", cq->cq_ci);
354                 cq->armed = 0;
355         }
356 #endif
357
358         /* Traffic detected: make sure timer is on. */
359         priv->last_traffic_tic = rte_rdtsc();
360         pthread_mutex_lock(&priv->timer_lock);
361         if (!priv->timer_on) {
362                 priv->timer_on = 1;
363                 pthread_cond_signal(&priv->timer_cond);
364         }
365         pthread_mutex_unlock(&priv->timer_lock);
366 }
367
368 int
369 mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
370 {
371         int flags;
372         int ret;
373
374         if (!priv->eventc)
375                 /* All virtqs are in poll mode. */
376                 return 0;
377         if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
378                 pthread_mutex_init(&priv->timer_lock, NULL);
379                 pthread_cond_init(&priv->timer_cond, NULL);
380                 priv->timer_on = 0;
381                 ret = pthread_create(&priv->timer_tid, NULL,
382                                      mlx5_vdpa_poll_handle, (void *)priv);
383                 if (ret) {
384                         DRV_LOG(ERR, "Failed to create timer thread.");
385                         return -1;
386                 }
387         }
388         flags = fcntl(priv->eventc->fd, F_GETFL);
389         ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
390         if (ret) {
391                 DRV_LOG(ERR, "Failed to change event channel FD.");
392                 goto error;
393         }
394         priv->intr_handle.fd = priv->eventc->fd;
395         priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
396         if (rte_intr_callback_register(&priv->intr_handle,
397                                        mlx5_vdpa_interrupt_handler, priv)) {
398                 priv->intr_handle.fd = 0;
399                 DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
400                 goto error;
401         }
402         return 0;
403 error:
404         mlx5_vdpa_cqe_event_unset(priv);
405         return -1;
406 }
407
408 void
409 mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
410 {
411         int retries = MLX5_VDPA_INTR_RETRIES;
412         int ret = -EAGAIN;
413         void *status;
414
415         if (priv->intr_handle.fd) {
416                 while (retries-- && ret == -EAGAIN) {
417                         ret = rte_intr_callback_unregister(&priv->intr_handle,
418                                                     mlx5_vdpa_interrupt_handler,
419                                                     priv);
420                         if (ret == -EAGAIN) {
421                                 DRV_LOG(DEBUG, "Try again to unregister fd %d "
422                                         "of CQ interrupt, retries = %d.",
423                                         priv->intr_handle.fd, retries);
424                                 rte_pause();
425                         }
426                 }
427                 memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
428         }
429         if (priv->timer_tid) {
430                 pthread_cancel(priv->timer_tid);
431                 pthread_join(priv->timer_tid, &status);
432         }
433         priv->timer_tid = 0;
434 }
435
436 void
437 mlx5_vdpa_event_qp_destroy(struct mlx5_vdpa_event_qp *eqp)
438 {
439         if (eqp->sw_qp)
440                 claim_zero(mlx5_devx_cmd_destroy(eqp->sw_qp));
441         if (eqp->umem_obj)
442                 claim_zero(mlx5_glue->devx_umem_dereg(eqp->umem_obj));
443         if (eqp->umem_buf)
444                 rte_free(eqp->umem_buf);
445         if (eqp->fw_qp)
446                 claim_zero(mlx5_devx_cmd_destroy(eqp->fw_qp));
447         mlx5_vdpa_cq_destroy(&eqp->cq);
448         memset(eqp, 0, sizeof(*eqp));
449 }
450
451 static int
452 mlx5_vdpa_qps2rts(struct mlx5_vdpa_event_qp *eqp)
453 {
454         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RST2INIT_QP,
455                                           eqp->sw_qp->id)) {
456                 DRV_LOG(ERR, "Failed to modify FW QP to INIT state(%u).",
457                         rte_errno);
458                 return -1;
459         }
460         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_RST2INIT_QP,
461                                           eqp->fw_qp->id)) {
462                 DRV_LOG(ERR, "Failed to modify SW QP to INIT state(%u).",
463                         rte_errno);
464                 return -1;
465         }
466         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_INIT2RTR_QP,
467                                           eqp->sw_qp->id)) {
468                 DRV_LOG(ERR, "Failed to modify FW QP to RTR state(%u).",
469                         rte_errno);
470                 return -1;
471         }
472         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_INIT2RTR_QP,
473                                           eqp->fw_qp->id)) {
474                 DRV_LOG(ERR, "Failed to modify SW QP to RTR state(%u).",
475                         rte_errno);
476                 return -1;
477         }
478         if (mlx5_devx_cmd_modify_qp_state(eqp->fw_qp, MLX5_CMD_OP_RTR2RTS_QP,
479                                           eqp->sw_qp->id)) {
480                 DRV_LOG(ERR, "Failed to modify FW QP to RTS state(%u).",
481                         rte_errno);
482                 return -1;
483         }
484         if (mlx5_devx_cmd_modify_qp_state(eqp->sw_qp, MLX5_CMD_OP_RTR2RTS_QP,
485                                           eqp->fw_qp->id)) {
486                 DRV_LOG(ERR, "Failed to modify SW QP to RTS state(%u).",
487                         rte_errno);
488                 return -1;
489         }
490         return 0;
491 }
492
493 int
494 mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
495                           int callfd, struct mlx5_vdpa_event_qp *eqp)
496 {
497         struct mlx5_devx_qp_attr attr = {0};
498         uint16_t log_desc_n = rte_log2_u32(desc_n);
499         uint32_t umem_size = (1 << log_desc_n) * MLX5_WSEG_SIZE +
500                                                        sizeof(*eqp->db_rec) * 2;
501
502         if (mlx5_vdpa_event_qp_global_prepare(priv))
503                 return -1;
504         if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq))
505                 return -1;
506         attr.pd = priv->pdn;
507         eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr);
508         if (!eqp->fw_qp) {
509                 DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno);
510                 goto error;
511         }
512         eqp->umem_buf = rte_zmalloc(__func__, umem_size, 4096);
513         if (!eqp->umem_buf) {
514                 DRV_LOG(ERR, "Failed to allocate memory for SW QP.");
515                 rte_errno = ENOMEM;
516                 goto error;
517         }
518         eqp->umem_obj = mlx5_glue->devx_umem_reg(priv->ctx,
519                                                (void *)(uintptr_t)eqp->umem_buf,
520                                                umem_size,
521                                                IBV_ACCESS_LOCAL_WRITE);
522         if (!eqp->umem_obj) {
523                 DRV_LOG(ERR, "Failed to register umem for SW QP.");
524                 goto error;
525         }
526         attr.uar_index = priv->uar->page_id;
527         attr.cqn = eqp->cq.cq->id;
528         attr.log_page_size = rte_log2_u32(sysconf(_SC_PAGESIZE));
529         attr.rq_size = 1 << log_desc_n;
530         attr.log_rq_stride = rte_log2_u32(MLX5_WSEG_SIZE);
531         attr.sq_size = 0; /* No need SQ. */
532         attr.dbr_umem_valid = 1;
533         attr.wq_umem_id = eqp->umem_obj->umem_id;
534         attr.wq_umem_offset = 0;
535         attr.dbr_umem_id = eqp->umem_obj->umem_id;
536         attr.dbr_address = (1 << log_desc_n) * MLX5_WSEG_SIZE;
537         eqp->sw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr);
538         if (!eqp->sw_qp) {
539                 DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno);
540                 goto error;
541         }
542         eqp->db_rec = RTE_PTR_ADD(eqp->umem_buf, (uintptr_t)attr.dbr_address);
543         if (mlx5_vdpa_qps2rts(eqp))
544                 goto error;
545         /* First ringing. */
546         rte_write32(rte_cpu_to_be_32(1 << log_desc_n), &eqp->db_rec[0]);
547         return 0;
548 error:
549         mlx5_vdpa_event_qp_destroy(eqp);
550         return -1;
551 }