net/ice/base: fix null pointer dereferences for parser
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_aso.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15
16 /**
17  * Destroy Completion Queue used for ASO access.
18  *
19  * @param[in] cq
20  *   ASO CQ to destroy.
21  */
22 static void
23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
24 {
25         if (cq->cq_obj.cq)
26                 mlx5_devx_cq_destroy(&cq->cq_obj);
27         memset(cq, 0, sizeof(*cq));
28 }
29
30 /**
31  * Create Completion Queue used for ASO access.
32  *
33  * @param[in] ctx
34  *   Context returned from mlx5 open_device() glue function.
35  * @param[in/out] cq
36  *   Pointer to CQ to create.
37  * @param[in] log_desc_n
38  *   Log of number of descriptors in queue.
39  * @param[in] socket
40  *   Socket to use for allocation.
41  * @param[in] uar_page_id
42  *   UAR page ID to use.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
49                    int socket, int uar_page_id)
50 {
51         struct mlx5_devx_cq_attr attr = {
52                 .uar_page_id = uar_page_id,
53         };
54
55         cq->log_desc_n = log_desc_n;
56         cq->cq_ci = 0;
57         return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
58 }
59
60 /**
61  * Free MR resources.
62  *
63  * @param[in] sh
64  *   Pointer to shared device context.
65  * @param[in] mr
66  *   MR to free.
67  */
68 static void
69 mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr)
70 {
71         void *addr = mr->addr;
72
73         sh->share_cache.dereg_mr_cb(mr);
74         mlx5_free(addr);
75         memset(mr, 0, sizeof(*mr));
76 }
77
78 /**
79  * Register Memory Region.
80  *
81  * @param[in] sh
82  *   Pointer to shared device context.
83  * @param[in] length
84  *   Size of MR buffer.
85  * @param[in/out] mr
86  *   Pointer to MR to create.
87  * @param[in] socket
88  *   Socket to use for allocation.
89  *
90  * @return
91  *   0 on success, a negative errno value otherwise and rte_errno is set.
92  */
93 static int
94 mlx5_aso_reg_mr(struct mlx5_dev_ctx_shared *sh, size_t length,
95                 struct mlx5_pmd_mr *mr, int socket)
96 {
97
98         int ret;
99
100         mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
101                                socket);
102         if (!mr->addr) {
103                 DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
104                 return -1;
105         }
106         ret = sh->share_cache.reg_mr_cb(sh->pd, mr->addr, length, mr);
107         if (ret) {
108                 DRV_LOG(ERR, "Failed to create direct Mkey.");
109                 mlx5_free(mr->addr);
110                 return -1;
111         }
112         return 0;
113 }
114
115 /**
116  * Destroy Send Queue used for ASO access.
117  *
118  * @param[in] sq
119  *   ASO SQ to destroy.
120  */
121 static void
122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
123 {
124         mlx5_devx_sq_destroy(&sq->sq_obj);
125         mlx5_aso_cq_destroy(&sq->cq);
126         memset(sq, 0, sizeof(*sq));
127 }
128
129 /**
130  * Initialize Send Queue used for ASO access.
131  *
132  * @param[in] sq
133  *   ASO SQ to initialize.
134  */
135 static void
136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
137 {
138         volatile struct mlx5_aso_wqe *restrict wqe;
139         int i;
140         int size = 1 << sq->log_desc_n;
141         uint64_t addr;
142
143         /* All the next fields state should stay constant. */
144         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
145                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
146                                                           (sizeof(*wqe) >> 4));
147                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
148                 addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
149                                             MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
150                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
151                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
152                 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
153                         (0u |
154                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
155                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
156                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
157                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
158                 wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
159         }
160 }
161
162 /**
163  * Initialize Send Queue used for ASO flow meter access.
164  *
165  * @param[in] sq
166  *   ASO SQ to initialize.
167  */
168 static void
169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
170 {
171         volatile struct mlx5_aso_wqe *restrict wqe;
172         int i;
173         int size = 1 << sq->log_desc_n;
174
175         /* All the next fields state should stay constant. */
176         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
177                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
178                                                           (sizeof(*wqe) >> 4));
179                 wqe->aso_cseg.operand_masks = RTE_BE32(0u |
180                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
181                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
182                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
183                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
184                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
185                                                          MLX5_COMP_MODE_OFFSET);
186         }
187 }
188
189 /*
190  * Initialize Send Queue used for ASO connection tracking.
191  *
192  * @param[in] sq
193  *   ASO SQ to initialize.
194  */
195 static void
196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
197 {
198         volatile struct mlx5_aso_wqe *restrict wqe;
199         int i;
200         int size = 1 << sq->log_desc_n;
201         uint64_t addr;
202
203         /* All the next fields state should stay constant. */
204         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
205                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
206                                                           (sizeof(*wqe) >> 4));
207                 /* One unique MR for the query data. */
208                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
209                 /* Magic number 64 represents the length of a ASO CT obj. */
210                 addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
211                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
212                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
213                 /*
214                  * The values of operand_masks are different for modify
215                  * and query.
216                  * And data_mask may be different for each modification. In
217                  * query, it could be zero and ignored.
218                  * CQE generation is always needed, in order to decide when
219                  * it is available to create the flow or read the data.
220                  */
221                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
222                                                    MLX5_COMP_MODE_OFFSET);
223         }
224 }
225
226 /**
227  * Create Send Queue used for ASO access.
228  *
229  * @param[in] ctx
230  *   Context returned from mlx5 open_device() glue function.
231  * @param[in/out] sq
232  *   Pointer to SQ to create.
233  * @param[in] socket
234  *   Socket to use for allocation.
235  * @param[in] uar
236  *   User Access Region object.
237  * @param[in] pdn
238  *   Protection Domain number to use.
239  * @param[in] log_desc_n
240  *   Log of number of descriptors in queue.
241  * @param[in] ts_format
242  *   timestamp format supported by the queue.
243  *
244  * @return
245  *   0 on success, a negative errno value otherwise and rte_errno is set.
246  */
247 static int
248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
249                    uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
250 {
251         struct mlx5_devx_create_sq_attr attr = {
252                 .user_index = 0xFFFF,
253                 .wq_attr = (struct mlx5_devx_wq_attr){
254                         .pd = pdn,
255                         .uar_page = mlx5_os_get_devx_uar_page_id(uar),
256                 },
257                 .ts_format = mlx5_ts_format_conv(ts_format),
258         };
259         struct mlx5_devx_modify_sq_attr modify_attr = {
260                 .state = MLX5_SQC_STATE_RDY,
261         };
262         uint16_t log_wqbb_n;
263         int ret;
264
265         if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
266                                mlx5_os_get_devx_uar_page_id(uar)))
267                 goto error;
268         sq->log_desc_n = log_desc_n;
269         attr.cqn = sq->cq.cq_obj.cq->id;
270         /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
271         log_wqbb_n = log_desc_n + 1;
272         ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
273         if (ret) {
274                 DRV_LOG(ERR, "Can't create SQ object.");
275                 rte_errno = ENOMEM;
276                 goto error;
277         }
278         ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
279         if (ret) {
280                 DRV_LOG(ERR, "Can't change SQ state to ready.");
281                 rte_errno = ENOMEM;
282                 goto error;
283         }
284         sq->pi = 0;
285         sq->head = 0;
286         sq->tail = 0;
287         sq->sqn = sq->sq_obj.sq->id;
288         sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
289         rte_spinlock_init(&sq->sqsl);
290         return 0;
291 error:
292         mlx5_aso_destroy_sq(sq);
293         return -1;
294 }
295
296 /**
297  * API to create and initialize Send Queue used for ASO access.
298  *
299  * @param[in] sh
300  *   Pointer to shared device context.
301  * @param[in] aso_opc_mod
302  *   Mode of ASO feature.
303  *
304  * @return
305  *   0 on success, a negative errno value otherwise and rte_errno is set.
306  */
307 int
308 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
309                     enum mlx5_access_aso_opc_mod aso_opc_mod)
310 {
311         uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
312
313         switch (aso_opc_mod) {
314         case ASO_OPC_MOD_FLOW_HIT:
315                 if (mlx5_aso_reg_mr(sh, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
316                                     sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
317                         return -1;
318                 if (mlx5_aso_sq_create(sh->ctx, &sh->aso_age_mng->aso_sq, 0,
319                                   sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
320                                   sh->sq_ts_format)) {
321                         mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
322                         return -1;
323                 }
324                 mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
325                 break;
326         case ASO_OPC_MOD_POLICER:
327                 if (mlx5_aso_sq_create(sh->ctx, &sh->mtrmng->pools_mng.sq, 0,
328                                   sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
329                                   sh->sq_ts_format))
330                         return -1;
331                 mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
332                 break;
333         case ASO_OPC_MOD_CONNECTION_TRACKING:
334                 /* 64B per object for query. */
335                 if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
336                                     &sh->ct_mng->aso_sq.mr, 0))
337                         return -1;
338                 if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
339                                 sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
340                                 sh->sq_ts_format)) {
341                         mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
342                         return -1;
343                 }
344                 mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
345                 break;
346         default:
347                 DRV_LOG(ERR, "Unknown ASO operation mode");
348                 return -1;
349         }
350         return 0;
351 }
352
353 /**
354  * API to destroy Send Queue used for ASO access.
355  *
356  * @param[in] sh
357  *   Pointer to shared device context.
358  * @param[in] aso_opc_mod
359  *   Mode of ASO feature.
360  */
361 void
362 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
363                       enum mlx5_access_aso_opc_mod aso_opc_mod)
364 {
365         struct mlx5_aso_sq *sq;
366
367         switch (aso_opc_mod) {
368         case ASO_OPC_MOD_FLOW_HIT:
369                 mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
370                 sq = &sh->aso_age_mng->aso_sq;
371                 break;
372         case ASO_OPC_MOD_POLICER:
373                 sq = &sh->mtrmng->pools_mng.sq;
374                 break;
375         case ASO_OPC_MOD_CONNECTION_TRACKING:
376                 mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
377                 sq = &sh->ct_mng->aso_sq;
378                 break;
379         default:
380                 DRV_LOG(ERR, "Unknown ASO operation mode");
381                 return;
382         }
383         mlx5_aso_destroy_sq(sq);
384 }
385
386 /**
387  * Write a burst of WQEs to ASO SQ.
388  *
389  * @param[in] mng
390  *   ASO management data, contains the SQ.
391  * @param[in] n
392  *   Index of the last valid pool.
393  *
394  * @return
395  *   Number of WQEs in burst.
396  */
397 static uint16_t
398 mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
399 {
400         volatile struct mlx5_aso_wqe *wqe;
401         struct mlx5_aso_sq *sq = &mng->aso_sq;
402         struct mlx5_aso_age_pool *pool;
403         uint16_t size = 1 << sq->log_desc_n;
404         uint16_t mask = size - 1;
405         uint16_t max;
406         uint16_t start_head = sq->head;
407
408         max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
409         if (unlikely(!max))
410                 return 0;
411         sq->elts[start_head & mask].burst_size = max;
412         do {
413                 wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
414                 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
415                 /* Fill next WQE. */
416                 rte_spinlock_lock(&mng->resize_sl);
417                 pool = mng->pools[sq->next];
418                 rte_spinlock_unlock(&mng->resize_sl);
419                 sq->elts[sq->head & mask].pool = pool;
420                 wqe->general_cseg.misc =
421                                 rte_cpu_to_be_32(((struct mlx5_devx_obj *)
422                                                  (pool->flow_hit_aso_obj))->id);
423                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
424                                                          MLX5_COMP_MODE_OFFSET);
425                 wqe->general_cseg.opcode = rte_cpu_to_be_32
426                                                 (MLX5_OPCODE_ACCESS_ASO |
427                                                  (ASO_OPC_MOD_FLOW_HIT <<
428                                                   WQE_CSEG_OPC_MOD_OFFSET) |
429                                                  (sq->pi <<
430                                                   WQE_CSEG_WQE_INDEX_OFFSET));
431                 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
432                 sq->head++;
433                 sq->next++;
434                 max--;
435         } while (max);
436         wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
437                                                          MLX5_COMP_MODE_OFFSET);
438         rte_io_wmb();
439         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
440         rte_wmb();
441         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
442         rte_wmb();
443         return sq->elts[start_head & mask].burst_size;
444 }
445
446 /**
447  * Debug utility function. Dump contents of error CQE and WQE.
448  *
449  * @param[in] cqe
450  *   Error CQE to dump.
451  * @param[in] wqe
452  *   Error WQE to dump.
453  */
454 static void
455 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
456 {
457         int i;
458
459         DRV_LOG(ERR, "Error cqe:");
460         for (i = 0; i < 16; i += 4)
461                 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
462                         cqe[i + 2], cqe[i + 3]);
463         DRV_LOG(ERR, "\nError wqe:");
464         for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
465                 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
466                         wqe[i + 2], wqe[i + 3]);
467 }
468
469 /**
470  * Handle case of error CQE.
471  *
472  * @param[in] sq
473  *   ASO SQ to use.
474  */
475 static void
476 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
477 {
478         struct mlx5_aso_cq *cq = &sq->cq;
479         uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
480         volatile struct mlx5_err_cqe *cqe =
481                         (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
482
483         cq->errors++;
484         idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
485         mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
486                                (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
487 }
488
489 /**
490  * Update ASO objects upon completion.
491  *
492  * @param[in] sh
493  *   Shared device context.
494  * @param[in] n
495  *   Number of completed ASO objects.
496  */
497 static void
498 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
499 {
500         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
501         struct mlx5_aso_sq *sq = &mng->aso_sq;
502         struct mlx5_age_info *age_info;
503         const uint16_t size = 1 << sq->log_desc_n;
504         const uint16_t mask = size - 1;
505         const uint64_t curr = MLX5_CURR_TIME_SEC;
506         uint16_t expected = AGE_CANDIDATE;
507         uint16_t i;
508
509         for (i = 0; i < n; ++i) {
510                 uint16_t idx = (sq->tail + i) & mask;
511                 struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
512                 uint64_t diff = curr - pool->time_of_last_age_check;
513                 uint64_t *addr = sq->mr.addr;
514                 int j;
515
516                 addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
517                 pool->time_of_last_age_check = curr;
518                 for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
519                         struct mlx5_aso_age_action *act = &pool->actions[j];
520                         struct mlx5_age_param *ap = &act->age_params;
521                         uint8_t byte;
522                         uint8_t offset;
523                         uint8_t *u8addr;
524                         uint8_t hit;
525
526                         if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
527                                             AGE_CANDIDATE)
528                                 continue;
529                         byte = 63 - (j / 8);
530                         offset = j % 8;
531                         u8addr = (uint8_t *)addr;
532                         hit = (u8addr[byte] >> offset) & 0x1;
533                         if (hit) {
534                                 __atomic_store_n(&ap->sec_since_last_hit, 0,
535                                                  __ATOMIC_RELAXED);
536                         } else {
537                                 struct mlx5_priv *priv;
538
539                                 __atomic_fetch_add(&ap->sec_since_last_hit,
540                                                    diff, __ATOMIC_RELAXED);
541                                 /* If timeout passed add to aged-out list. */
542                                 if (ap->sec_since_last_hit <= ap->timeout)
543                                         continue;
544                                 priv =
545                                 rte_eth_devices[ap->port_id].data->dev_private;
546                                 age_info = GET_PORT_AGE_INFO(priv);
547                                 rte_spinlock_lock(&age_info->aged_sl);
548                                 if (__atomic_compare_exchange_n(&ap->state,
549                                                                 &expected,
550                                                                 AGE_TMOUT,
551                                                                 false,
552                                                                __ATOMIC_RELAXED,
553                                                             __ATOMIC_RELAXED)) {
554                                         LIST_INSERT_HEAD(&age_info->aged_aso,
555                                                          act, next);
556                                         MLX5_AGE_SET(age_info,
557                                                      MLX5_AGE_EVENT_NEW);
558                                 }
559                                 rte_spinlock_unlock(&age_info->aged_sl);
560                         }
561                 }
562         }
563         mlx5_age_event_prepare(sh);
564 }
565
566 /**
567  * Handle completions from WQEs sent to ASO SQ.
568  *
569  * @param[in] sh
570  *   Shared device context.
571  *
572  * @return
573  *   Number of CQEs handled.
574  */
575 static uint16_t
576 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
577 {
578         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
579         struct mlx5_aso_sq *sq = &mng->aso_sq;
580         struct mlx5_aso_cq *cq = &sq->cq;
581         volatile struct mlx5_cqe *restrict cqe;
582         const unsigned int cq_size = 1 << cq->log_desc_n;
583         const unsigned int mask = cq_size - 1;
584         uint32_t idx;
585         uint32_t next_idx = cq->cq_ci & mask;
586         const uint16_t max = (uint16_t)(sq->head - sq->tail);
587         uint16_t i = 0;
588         int ret;
589         if (unlikely(!max))
590                 return 0;
591         do {
592                 idx = next_idx;
593                 next_idx = (cq->cq_ci + 1) & mask;
594                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
595                 cqe = &cq->cq_obj.cqes[idx];
596                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
597                 /*
598                  * Be sure owner read is done before any other cookie field or
599                  * opaque field.
600                  */
601                 rte_io_rmb();
602                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
603                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
604                                 break;
605                         mlx5_aso_cqe_err_handle(sq);
606                 } else {
607                         i += sq->elts[(sq->tail + i) & mask].burst_size;
608                 }
609                 cq->cq_ci++;
610         } while (1);
611         if (likely(i)) {
612                 mlx5_aso_age_action_update(sh, i);
613                 sq->tail += i;
614                 rte_io_wmb();
615                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
616         }
617         return i;
618 }
619
620 /**
621  * Periodically read CQEs and send WQEs to ASO SQ.
622  *
623  * @param[in] arg
624  *   Shared device context containing the ASO SQ.
625  */
626 static void
627 mlx5_flow_aso_alarm(void *arg)
628 {
629         struct mlx5_dev_ctx_shared *sh = arg;
630         struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
631         uint32_t us = 100u;
632         uint16_t n;
633
634         rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
635         n = sh->aso_age_mng->next;
636         rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
637         mlx5_aso_completion_handle(sh);
638         if (sq->next == n) {
639                 /* End of loop: wait 1 second. */
640                 us = US_PER_S;
641                 sq->next = 0;
642         }
643         mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
644         if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
645                 DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
646 }
647
648 /**
649  * API to start ASO access using ASO SQ.
650  *
651  * @param[in] sh
652  *   Pointer to shared device context.
653  *
654  * @return
655  *   0 on success, a negative errno value otherwise and rte_errno is set.
656  */
657 int
658 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
659 {
660         if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
661                 DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
662                 return -rte_errno;
663         }
664         return 0;
665 }
666
667 /**
668  * API to stop ASO access using ASO SQ.
669  *
670  * @param[in] sh
671  *   Pointer to shared device context.
672  *
673  * @return
674  *   0 on success, a negative errno value otherwise and rte_errno is set.
675  */
676 int
677 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
678 {
679         int retries = 1024;
680
681         if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
682                 return -EINVAL;
683         rte_errno = 0;
684         while (--retries) {
685                 rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
686                 if (rte_errno != EINPROGRESS)
687                         break;
688                 rte_pause();
689         }
690         return -rte_errno;
691 }
692
693 static uint16_t
694 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
695                 struct mlx5_aso_mtr *aso_mtr)
696 {
697         volatile struct mlx5_aso_wqe *wqe = NULL;
698         struct mlx5_flow_meter_info *fm = NULL;
699         struct mlx5_flow_meter_profile *fmp;
700         uint16_t size = 1 << sq->log_desc_n;
701         uint16_t mask = size - 1;
702         uint16_t res;
703         uint32_t dseg_idx = 0;
704         struct mlx5_aso_mtr_pool *pool = NULL;
705
706         rte_spinlock_lock(&sq->sqsl);
707         res = size - (uint16_t)(sq->head - sq->tail);
708         if (unlikely(!res)) {
709                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
710                 rte_spinlock_unlock(&sq->sqsl);
711                 return 0;
712         }
713         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
714         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
715         /* Fill next WQE. */
716         fm = &aso_mtr->fm;
717         sq->elts[sq->head & mask].mtr = aso_mtr;
718         pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
719                         mtrs[aso_mtr->offset]);
720         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
721                         (aso_mtr->offset >> 1));
722         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
723                         (ASO_OPC_MOD_POLICER <<
724                         WQE_CSEG_OPC_MOD_OFFSET) |
725                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
726         /* There are 2 meters in one ASO cache line. */
727         dseg_idx = aso_mtr->offset & 0x1;
728         wqe->aso_cseg.data_mask =
729                 RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
730         if (fm->is_enable) {
731                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
732                         fm->profile->srtcm_prm.cbs_cir;
733                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
734                         fm->profile->srtcm_prm.ebs_eir;
735         } else {
736                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
737                         RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
738                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
739         }
740         fmp = fm->profile;
741         if (fmp->profile.packet_mode)
742                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
743                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
744                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
745                                 (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
746         else
747                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
748                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
749                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
750         switch (fmp->profile.alg) {
751         case RTE_MTR_SRTCM_RFC2697:
752                 /* Only needed for RFC2697. */
753                 if (fm->profile->srtcm_prm.ebs_eir)
754                         wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
755                                         RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
756                 break;
757         case RTE_MTR_TRTCM_RFC2698:
758                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
759                                 RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
760                 break;
761         case RTE_MTR_TRTCM_RFC4115:
762         default:
763                 break;
764         }
765         /*
766          * Note:
767          * Due to software performance reason, the token fields will not be
768          * set when posting the WQE to ASO SQ. It will be filled by the HW
769          * automatically.
770          */
771         sq->head++;
772         sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
773         rte_io_wmb();
774         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
775         rte_wmb();
776         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
777         rte_wmb();
778         rte_spinlock_unlock(&sq->sqsl);
779         return 1;
780 }
781
782 static void
783 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
784 {
785         uint16_t size = 1 << sq->log_desc_n;
786         uint16_t mask = size - 1;
787         uint16_t i;
788         struct mlx5_aso_mtr *aso_mtr = NULL;
789         uint8_t exp_state = ASO_METER_WAIT;
790
791         for (i = 0; i < aso_mtrs_nums; ++i) {
792                 aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
793                 MLX5_ASSERT(aso_mtr);
794                 (void)__atomic_compare_exchange_n(&aso_mtr->state,
795                                 &exp_state, ASO_METER_READY,
796                                 false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
797         }
798 }
799
800 static void
801 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
802 {
803         struct mlx5_aso_cq *cq = &sq->cq;
804         volatile struct mlx5_cqe *restrict cqe;
805         const unsigned int cq_size = 1 << cq->log_desc_n;
806         const unsigned int mask = cq_size - 1;
807         uint32_t idx;
808         uint32_t next_idx = cq->cq_ci & mask;
809         uint16_t max;
810         uint16_t n = 0;
811         int ret;
812
813         rte_spinlock_lock(&sq->sqsl);
814         max = (uint16_t)(sq->head - sq->tail);
815         if (unlikely(!max)) {
816                 rte_spinlock_unlock(&sq->sqsl);
817                 return;
818         }
819         do {
820                 idx = next_idx;
821                 next_idx = (cq->cq_ci + 1) & mask;
822                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
823                 cqe = &cq->cq_obj.cqes[idx];
824                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
825                 /*
826                  * Be sure owner read is done before any other cookie field or
827                  * opaque field.
828                  */
829                 rte_io_rmb();
830                 if (ret != MLX5_CQE_STATUS_SW_OWN) {
831                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
832                                 break;
833                         mlx5_aso_cqe_err_handle(sq);
834                 } else {
835                         n++;
836                 }
837                 cq->cq_ci++;
838         } while (1);
839         if (likely(n)) {
840                 mlx5_aso_mtrs_status_update(sq, n);
841                 sq->tail += n;
842                 rte_io_wmb();
843                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
844         }
845         rte_spinlock_unlock(&sq->sqsl);
846 }
847
848 /**
849  * Update meter parameter by send WQE.
850  *
851  * @param[in] dev
852  *   Pointer to Ethernet device.
853  * @param[in] priv
854  *   Pointer to mlx5 private data structure.
855  * @param[in] fm
856  *   Pointer to flow meter to be modified.
857  *
858  * @return
859  *   0 on success, a negative errno value otherwise and rte_errno is set.
860  */
861 int
862 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
863                         struct mlx5_aso_mtr *mtr)
864 {
865         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
866         uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
867
868         do {
869                 mlx5_aso_mtr_completion_handle(sq);
870                 if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
871                         return 0;
872                 /* Waiting for wqe resource. */
873                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
874         } while (--poll_wqe_times);
875         DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
876                         mtr->offset);
877         return -1;
878 }
879
880 /**
881  * Wait for meter to be ready.
882  *
883  * @param[in] dev
884  *   Pointer to Ethernet device.
885  * @param[in] priv
886  *   Pointer to mlx5 private data structure.
887  * @param[in] fm
888  *   Pointer to flow meter to be modified.
889  *
890  * @return
891  *   0 on success, a negative errno value otherwise and rte_errno is set.
892  */
893 int
894 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
895                         struct mlx5_aso_mtr *mtr)
896 {
897         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
898         uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
899
900         if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
901                                             ASO_METER_READY)
902                 return 0;
903         do {
904                 mlx5_aso_mtr_completion_handle(sq);
905                 if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
906                                             ASO_METER_READY)
907                         return 0;
908                 /* Waiting for CQE ready. */
909                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
910         } while (--poll_cqe_times);
911         DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
912                         mtr->offset);
913         return -1;
914 }
915
916 /*
917  * Post a WQE to the ASO CT SQ to modify the context.
918  *
919  * @param[in] mng
920  *   Pointer to the CT pools management structure.
921  * @param[in] ct
922  *   Pointer to the generic CT structure related to the context.
923  * @param[in] profile
924  *   Pointer to configuration profile.
925  *
926  * @return
927  *   1 on success (WQE number), 0 on failure.
928  */
929 static uint16_t
930 mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
931                               struct mlx5_aso_ct_action *ct,
932                               const struct rte_flow_action_conntrack *profile)
933 {
934         volatile struct mlx5_aso_wqe *wqe = NULL;
935         struct mlx5_aso_sq *sq = &mng->aso_sq;
936         uint16_t size = 1 << sq->log_desc_n;
937         uint16_t mask = size - 1;
938         uint16_t res;
939         struct mlx5_aso_ct_pool *pool;
940         void *desg;
941         void *orig_dir;
942         void *reply_dir;
943
944         rte_spinlock_lock(&sq->sqsl);
945         /* Prevent other threads to update the index. */
946         res = size - (uint16_t)(sq->head - sq->tail);
947         if (unlikely(!res)) {
948                 rte_spinlock_unlock(&sq->sqsl);
949                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
950                 return 0;
951         }
952         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
953         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
954         /* Fill next WQE. */
955         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
956         sq->elts[sq->head & mask].ct = ct;
957         sq->elts[sq->head & mask].query_data = NULL;
958         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
959         /* Each WQE will have a single CT object. */
960         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
961                                                   ct->offset);
962         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
963                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
964                          WQE_CSEG_OPC_MOD_OFFSET) |
965                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
966         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
967                         (0u |
968                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
969                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
970                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
971                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
972         wqe->aso_cseg.data_mask = UINT64_MAX;
973         /* To make compiler happy. */
974         desg = (void *)(uintptr_t)wqe->aso_dseg.data;
975         MLX5_SET(conn_track_aso, desg, valid, 1);
976         MLX5_SET(conn_track_aso, desg, state, profile->state);
977         MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
978         MLX5_SET(conn_track_aso, desg, connection_assured,
979                  profile->live_connection);
980         MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
981         MLX5_SET(conn_track_aso, desg, challenged_acked,
982                  profile->challenge_ack_passed);
983         /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
984         MLX5_SET(conn_track_aso, desg, heartbeat, 0);
985         MLX5_SET(conn_track_aso, desg, max_ack_window,
986                  profile->max_ack_window);
987         MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
988         MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
989         MLX5_SET(conn_track_aso, desg, retranmission_limit,
990                  profile->retransmission_limit);
991         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
992                  profile->reply_dir.scale);
993         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
994                  profile->reply_dir.close_initiated);
995         /* Both directions will use the same liberal mode. */
996         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
997                  profile->liberal_mode);
998         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
999                  profile->reply_dir.data_unacked);
1000         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1001                  profile->reply_dir.last_ack_seen);
1002         MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1003                  profile->original_dir.scale);
1004         MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1005                  profile->original_dir.close_initiated);
1006         MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1007                  profile->liberal_mode);
1008         MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1009                  profile->original_dir.data_unacked);
1010         MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1011                  profile->original_dir.last_ack_seen);
1012         MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1013         MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1014         MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1015         MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1016         MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1017         MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1018         orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1019         MLX5_SET(tcp_window_params, orig_dir, sent_end,
1020                  profile->original_dir.sent_end);
1021         MLX5_SET(tcp_window_params, orig_dir, reply_end,
1022                  profile->original_dir.reply_end);
1023         MLX5_SET(tcp_window_params, orig_dir, max_win,
1024                  profile->original_dir.max_win);
1025         MLX5_SET(tcp_window_params, orig_dir, max_ack,
1026                  profile->original_dir.max_ack);
1027         reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1028         MLX5_SET(tcp_window_params, reply_dir, sent_end,
1029                  profile->reply_dir.sent_end);
1030         MLX5_SET(tcp_window_params, reply_dir, reply_end,
1031                  profile->reply_dir.reply_end);
1032         MLX5_SET(tcp_window_params, reply_dir, max_win,
1033                  profile->reply_dir.max_win);
1034         MLX5_SET(tcp_window_params, reply_dir, max_ack,
1035                  profile->reply_dir.max_ack);
1036         sq->head++;
1037         sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1038         rte_io_wmb();
1039         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1040         rte_wmb();
1041         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1042         rte_wmb();
1043         rte_spinlock_unlock(&sq->sqsl);
1044         return 1;
1045 }
1046
1047 /*
1048  * Update the status field of CTs to indicate ready to be used by flows.
1049  * A continuous number of CTs since last update.
1050  *
1051  * @param[in] sq
1052  *   Pointer to ASO CT SQ.
1053  * @param[in] num
1054  *   Number of CT structures to be updated.
1055  *
1056  * @return
1057  *   0 on success, a negative value.
1058  */
1059 static void
1060 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1061 {
1062         uint16_t size = 1 << sq->log_desc_n;
1063         uint16_t mask = size - 1;
1064         uint16_t i;
1065         struct mlx5_aso_ct_action *ct = NULL;
1066         uint16_t idx;
1067
1068         for (i = 0; i < num; i++) {
1069                 idx = (uint16_t)((sq->tail + i) & mask);
1070                 ct = sq->elts[idx].ct;
1071                 MLX5_ASSERT(ct);
1072                 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1073                 if (sq->elts[idx].query_data)
1074                         rte_memcpy(sq->elts[idx].query_data,
1075                                    (char *)((uintptr_t)sq->mr.addr + idx * 64),
1076                                    64);
1077         }
1078 }
1079
1080 /*
1081  * Post a WQE to the ASO CT SQ to query the current context.
1082  *
1083  * @param[in] mng
1084  *   Pointer to the CT pools management structure.
1085  * @param[in] ct
1086  *   Pointer to the generic CT structure related to the context.
1087  * @param[in] data
1088  *   Pointer to data area to be filled.
1089  *
1090  * @return
1091  *   1 on success (WQE number), 0 on failure.
1092  */
1093 static int
1094 mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
1095                             struct mlx5_aso_ct_action *ct, char *data)
1096 {
1097         volatile struct mlx5_aso_wqe *wqe = NULL;
1098         struct mlx5_aso_sq *sq = &mng->aso_sq;
1099         uint16_t size = 1 << sq->log_desc_n;
1100         uint16_t mask = size - 1;
1101         uint16_t res;
1102         uint16_t wqe_idx;
1103         struct mlx5_aso_ct_pool *pool;
1104         enum mlx5_aso_ct_state state =
1105                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1106
1107         if (state == ASO_CONNTRACK_FREE) {
1108                 DRV_LOG(ERR, "Fail: No context to query");
1109                 return -1;
1110         } else if (state == ASO_CONNTRACK_WAIT) {
1111                 return 0;
1112         }
1113         rte_spinlock_lock(&sq->sqsl);
1114         res = size - (uint16_t)(sq->head - sq->tail);
1115         if (unlikely(!res)) {
1116                 rte_spinlock_unlock(&sq->sqsl);
1117                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1118                 return 0;
1119         }
1120         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1121         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1122         /* Confirm the location and address of the prefetch instruction. */
1123         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1124         /* Fill next WQE. */
1125         wqe_idx = sq->head & mask;
1126         sq->elts[wqe_idx].ct = ct;
1127         sq->elts[wqe_idx].query_data = data;
1128         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1129         /* Each WQE will have a single CT object. */
1130         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1131                                                   ct->offset);
1132         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1133                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
1134                          WQE_CSEG_OPC_MOD_OFFSET) |
1135                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1136         /*
1137          * There is no write request is required.
1138          * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1139          * "BYTEWISE_64BYTE" is needed for a whole context.
1140          * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1141          * "data_mask" is ignored.
1142          * Buffer address was already filled during initialization.
1143          */
1144         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1145                                         ASO_CSEG_DATA_MASK_MODE_OFFSET);
1146         wqe->aso_cseg.data_mask = 0;
1147         sq->head++;
1148         /*
1149          * Each WQE contains 2 WQEBB's, even though
1150          * data segment is not used in this case.
1151          */
1152         sq->pi += 2;
1153         rte_io_wmb();
1154         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1155         rte_wmb();
1156         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1157         rte_wmb();
1158         rte_spinlock_unlock(&sq->sqsl);
1159         return 1;
1160 }
1161
1162 /*
1163  * Handle completions from WQEs sent to ASO CT.
1164  *
1165  * @param[in] mng
1166  *   Pointer to the CT pools management structure.
1167  */
1168 static void
1169 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1170 {
1171         struct mlx5_aso_sq *sq = &mng->aso_sq;
1172         struct mlx5_aso_cq *cq = &sq->cq;
1173         volatile struct mlx5_cqe *restrict cqe;
1174         const uint32_t cq_size = 1 << cq->log_desc_n;
1175         const uint32_t mask = cq_size - 1;
1176         uint32_t idx;
1177         uint32_t next_idx;
1178         uint16_t max;
1179         uint16_t n = 0;
1180         int ret;
1181
1182         rte_spinlock_lock(&sq->sqsl);
1183         max = (uint16_t)(sq->head - sq->tail);
1184         if (unlikely(!max)) {
1185                 rte_spinlock_unlock(&sq->sqsl);
1186                 return;
1187         }
1188         next_idx = cq->cq_ci & mask;
1189         do {
1190                 idx = next_idx;
1191                 next_idx = (cq->cq_ci + 1) & mask;
1192                 /* Need to confirm the position of the prefetch. */
1193                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1194                 cqe = &cq->cq_obj.cqes[idx];
1195                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
1196                 /*
1197                  * Be sure owner read is done before any other cookie field or
1198                  * opaque field.
1199                  */
1200                 rte_io_rmb();
1201                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1202                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1203                                 break;
1204                         mlx5_aso_cqe_err_handle(sq);
1205                 } else {
1206                         n++;
1207                 }
1208                 cq->cq_ci++;
1209         } while (1);
1210         if (likely(n)) {
1211                 mlx5_aso_ct_status_update(sq, n);
1212                 sq->tail += n;
1213                 rte_io_wmb();
1214                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1215         }
1216         rte_spinlock_unlock(&sq->sqsl);
1217 }
1218
1219 /*
1220  * Update connection tracking ASO context by sending WQE.
1221  *
1222  * @param[in] sh
1223  *   Pointer to mlx5_dev_ctx_shared object.
1224  * @param[in] ct
1225  *   Pointer to connection tracking offload object.
1226  * @param[in] profile
1227  *   Pointer to connection tracking TCP parameter.
1228  *
1229  * @return
1230  *   0 on success, -1 on failure.
1231  */
1232 int
1233 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1234                           struct mlx5_aso_ct_action *ct,
1235                           const struct rte_flow_action_conntrack *profile)
1236 {
1237         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1238         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1239         struct mlx5_aso_ct_pool *pool;
1240
1241         MLX5_ASSERT(ct);
1242         do {
1243                 mlx5_aso_ct_completion_handle(mng);
1244                 if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
1245                         return 0;
1246                 /* Waiting for wqe resource. */
1247                 rte_delay_us_sleep(10u);
1248         } while (--poll_wqe_times);
1249         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1250         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1251                 ct->offset, pool->index);
1252         return -1;
1253 }
1254
1255 /*
1256  * The routine is used to wait for WQE completion to continue with queried data.
1257  *
1258  * @param[in] sh
1259  *   Pointer to mlx5_dev_ctx_shared object.
1260  * @param[in] ct
1261  *   Pointer to connection tracking offload object.
1262  *
1263  * @return
1264  *   0 on success, -1 on failure.
1265  */
1266 int
1267 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1268                        struct mlx5_aso_ct_action *ct)
1269 {
1270         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1271         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1272         struct mlx5_aso_ct_pool *pool;
1273
1274         if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1275             ASO_CONNTRACK_READY)
1276                 return 0;
1277         do {
1278                 mlx5_aso_ct_completion_handle(mng);
1279                 if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1280                     ASO_CONNTRACK_READY)
1281                         return 0;
1282                 /* Waiting for CQE ready, consider should block or sleep. */
1283                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1284         } while (--poll_cqe_times);
1285         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1286         DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1287                 ct->offset, pool->index);
1288         return -1;
1289 }
1290
1291 /*
1292  * Convert the hardware conntrack data format into the profile.
1293  *
1294  * @param[in] profile
1295  *   Pointer to conntrack profile to be filled after query.
1296  * @param[in] wdata
1297  *   Pointer to data fetched from hardware.
1298  */
1299 static inline void
1300 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1301                         char *wdata)
1302 {
1303         void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1304         void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1305
1306         /* MLX5_GET16 should be taken into consideration. */
1307         profile->state = (enum rte_flow_conntrack_state)
1308                          MLX5_GET(conn_track_aso, wdata, state);
1309         profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1310         profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1311                                           sack_permitted);
1312         profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1313                                             connection_assured);
1314         profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1315                                                  challenged_acked);
1316         profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1317                                            max_ack_window);
1318         profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1319                                                  retranmission_limit);
1320         profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1321         profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1322         profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1323                               MLX5_GET(conn_track_aso, wdata, last_index);
1324         profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1325         profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1326         profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1327         profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1328                                 reply_direction_tcp_liberal_enabled) |
1329                                 MLX5_GET(conn_track_aso, wdata,
1330                                 original_direction_tcp_liberal_enabled);
1331         /* No liberal in the RTE structure profile. */
1332         profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1333                                             reply_direction_tcp_scale);
1334         profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1335                                         reply_direction_tcp_close_initiated);
1336         profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1337                                         reply_direction_tcp_data_unacked);
1338         profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1339                                         reply_direction_tcp_max_ack);
1340         profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1341                                                r_dir, sent_end);
1342         profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1343                                                 r_dir, reply_end);
1344         profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1345                                               r_dir, max_win);
1346         profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1347                                               r_dir, max_ack);
1348         profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1349                                                original_direction_tcp_scale);
1350         profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1351                                         original_direction_tcp_close_initiated);
1352         profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1353                                         original_direction_tcp_data_unacked);
1354         profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1355                                         original_direction_tcp_max_ack);
1356         profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1357                                                   o_dir, sent_end);
1358         profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1359                                                    o_dir, reply_end);
1360         profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1361                                                  o_dir, max_win);
1362         profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1363                                                  o_dir, max_ack);
1364 }
1365
1366 /*
1367  * Query connection tracking information parameter by send WQE.
1368  *
1369  * @param[in] dev
1370  *   Pointer to Ethernet device.
1371  * @param[in] ct
1372  *   Pointer to connection tracking offload object.
1373  * @param[out] profile
1374  *   Pointer to connection tracking TCP information.
1375  *
1376  * @return
1377  *   0 on success, -1 on failure.
1378  */
1379 int
1380 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1381                          struct mlx5_aso_ct_action *ct,
1382                          struct rte_flow_action_conntrack *profile)
1383 {
1384         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1385         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1386         struct mlx5_aso_ct_pool *pool;
1387         char out_data[64 * 2];
1388         int ret;
1389
1390         MLX5_ASSERT(ct);
1391         do {
1392                 mlx5_aso_ct_completion_handle(mng);
1393                 ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
1394                 if (ret < 0)
1395                         return ret;
1396                 else if (ret > 0)
1397                         goto data_handle;
1398                 /* Waiting for wqe resource or state. */
1399                 else
1400                         rte_delay_us_sleep(10u);
1401         } while (--poll_wqe_times);
1402         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1403         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1404                 ct->offset, pool->index);
1405         return -1;
1406 data_handle:
1407         ret = mlx5_aso_ct_wait_ready(sh, ct);
1408         if (!ret)
1409                 mlx5_aso_ct_obj_analyze(profile, out_data);
1410         return ret;
1411 }
1412
1413 /*
1414  * Make sure the conntrack context is synchronized with hardware before
1415  * creating a flow rule that uses it.
1416  *
1417  * @param[in] sh
1418  *   Pointer to shared device context.
1419  * @param[in] ct
1420  *   Pointer to connection tracking offload object.
1421  *
1422  * @return
1423  *   0 on success, a negative errno value otherwise and rte_errno is set.
1424  */
1425 int
1426 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1427                       struct mlx5_aso_ct_action *ct)
1428 {
1429         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1430         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1431         enum mlx5_aso_ct_state state =
1432                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1433
1434         if (state == ASO_CONNTRACK_FREE) {
1435                 rte_errno = ENXIO;
1436                 return -rte_errno;
1437         } else if (state == ASO_CONNTRACK_READY ||
1438                    state == ASO_CONNTRACK_QUERY) {
1439                 return 0;
1440         }
1441         do {
1442                 mlx5_aso_ct_completion_handle(mng);
1443                 state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1444                 if (state == ASO_CONNTRACK_READY ||
1445                     state == ASO_CONNTRACK_QUERY)
1446                         return 0;
1447                 /* Waiting for CQE ready, consider should block or sleep. */
1448                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1449         } while (--poll_cqe_times);
1450         rte_errno = EBUSY;
1451         return -rte_errno;
1452 }