49eec7a6b656df6cfefb91d15c3b37fec99602f1
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_aso.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15
16 /**
17  * Destroy Completion Queue used for ASO access.
18  *
19  * @param[in] cq
20  *   ASO CQ to destroy.
21  */
22 static void
23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
24 {
25         if (cq->cq_obj.cq)
26                 mlx5_devx_cq_destroy(&cq->cq_obj);
27         memset(cq, 0, sizeof(*cq));
28 }
29
30 /**
31  * Create Completion Queue used for ASO access.
32  *
33  * @param[in] ctx
34  *   Context returned from mlx5 open_device() glue function.
35  * @param[in/out] cq
36  *   Pointer to CQ to create.
37  * @param[in] log_desc_n
38  *   Log of number of descriptors in queue.
39  * @param[in] socket
40  *   Socket to use for allocation.
41  * @param[in] uar_page_id
42  *   UAR page ID to use.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
49                    int socket, int uar_page_id)
50 {
51         struct mlx5_devx_cq_attr attr = {
52                 .uar_page_id = uar_page_id,
53         };
54
55         cq->log_desc_n = log_desc_n;
56         cq->cq_ci = 0;
57         return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
58 }
59
60 /**
61  * Free MR resources.
62  *
63  * @param[in] sh
64  *   Pointer to shared device context.
65  * @param[in] mr
66  *   MR to free.
67  */
68 static void
69 mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr)
70 {
71         void *addr = mr->addr;
72
73         sh->share_cache.dereg_mr_cb(mr);
74         mlx5_free(addr);
75         memset(mr, 0, sizeof(*mr));
76 }
77
78 /**
79  * Register Memory Region.
80  *
81  * @param[in] sh
82  *   Pointer to shared device context.
83  * @param[in] length
84  *   Size of MR buffer.
85  * @param[in/out] mr
86  *   Pointer to MR to create.
87  * @param[in] socket
88  *   Socket to use for allocation.
89  *
90  * @return
91  *   0 on success, a negative errno value otherwise and rte_errno is set.
92  */
93 static int
94 mlx5_aso_reg_mr(struct mlx5_dev_ctx_shared *sh, size_t length,
95                 struct mlx5_pmd_mr *mr, int socket)
96 {
97
98         int ret;
99
100         mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
101                                socket);
102         if (!mr->addr) {
103                 DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
104                 return -1;
105         }
106         ret = sh->share_cache.reg_mr_cb(sh->pd, mr->addr, length, mr);
107         if (ret) {
108                 DRV_LOG(ERR, "Failed to create direct Mkey.");
109                 mlx5_free(mr->addr);
110                 return -1;
111         }
112         return 0;
113 }
114
115 /**
116  * Destroy Send Queue used for ASO access.
117  *
118  * @param[in] sq
119  *   ASO SQ to destroy.
120  */
121 static void
122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
123 {
124         mlx5_devx_sq_destroy(&sq->sq_obj);
125         mlx5_aso_cq_destroy(&sq->cq);
126         memset(sq, 0, sizeof(*sq));
127 }
128
129 /**
130  * Initialize Send Queue used for ASO access.
131  *
132  * @param[in] sq
133  *   ASO SQ to initialize.
134  */
135 static void
136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
137 {
138         volatile struct mlx5_aso_wqe *restrict wqe;
139         int i;
140         int size = 1 << sq->log_desc_n;
141         uint64_t addr;
142
143         /* All the next fields state should stay constant. */
144         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
145                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
146                                                           (sizeof(*wqe) >> 4));
147                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
148                 addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
149                                             MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
150                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
151                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
152                 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
153                         (0u |
154                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
155                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
156                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
157                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
158                 wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
159         }
160 }
161
162 /**
163  * Initialize Send Queue used for ASO flow meter access.
164  *
165  * @param[in] sq
166  *   ASO SQ to initialize.
167  */
168 static void
169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
170 {
171         volatile struct mlx5_aso_wqe *restrict wqe;
172         int i;
173         int size = 1 << sq->log_desc_n;
174
175         /* All the next fields state should stay constant. */
176         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
177                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
178                                                           (sizeof(*wqe) >> 4));
179                 wqe->aso_cseg.operand_masks = RTE_BE32(0u |
180                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
181                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
182                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
183                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
184                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
185                                                          MLX5_COMP_MODE_OFFSET);
186         }
187 }
188
189 /*
190  * Initialize Send Queue used for ASO connection tracking.
191  *
192  * @param[in] sq
193  *   ASO SQ to initialize.
194  */
195 static void
196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
197 {
198         volatile struct mlx5_aso_wqe *restrict wqe;
199         int i;
200         int size = 1 << sq->log_desc_n;
201         uint64_t addr;
202
203         /* All the next fields state should stay constant. */
204         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
205                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
206                                                           (sizeof(*wqe) >> 4));
207                 /* One unique MR for the query data. */
208                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
209                 /* Magic number 64 represents the length of a ASO CT obj. */
210                 addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
211                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
212                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
213                 /*
214                  * The values of operand_masks are different for modify
215                  * and query.
216                  * And data_mask may be different for each modification. In
217                  * query, it could be zero and ignored.
218                  * CQE generation is always needed, in order to decide when
219                  * it is available to create the flow or read the data.
220                  */
221                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
222                                                    MLX5_COMP_MODE_OFFSET);
223         }
224 }
225
226 /**
227  * Create Send Queue used for ASO access.
228  *
229  * @param[in] ctx
230  *   Context returned from mlx5 open_device() glue function.
231  * @param[in/out] sq
232  *   Pointer to SQ to create.
233  * @param[in] socket
234  *   Socket to use for allocation.
235  * @param[in] uar
236  *   User Access Region object.
237  * @param[in] pdn
238  *   Protection Domain number to use.
239  * @param[in] log_desc_n
240  *   Log of number of descriptors in queue.
241  * @param[in] ts_format
242  *   timestamp format supported by the queue.
243  *
244  * @return
245  *   0 on success, a negative errno value otherwise and rte_errno is set.
246  */
247 static int
248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
249                    uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
250 {
251         struct mlx5_devx_create_sq_attr attr = {
252                 .user_index = 0xFFFF,
253                 .wq_attr = (struct mlx5_devx_wq_attr){
254                         .pd = pdn,
255                         .uar_page = mlx5_os_get_devx_uar_page_id(uar),
256                 },
257                 .ts_format = mlx5_ts_format_conv(ts_format),
258         };
259         struct mlx5_devx_modify_sq_attr modify_attr = {
260                 .state = MLX5_SQC_STATE_RDY,
261         };
262         uint16_t log_wqbb_n;
263         int ret;
264
265         if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
266                                mlx5_os_get_devx_uar_page_id(uar)))
267                 goto error;
268         sq->log_desc_n = log_desc_n;
269         attr.cqn = sq->cq.cq_obj.cq->id;
270         /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
271         log_wqbb_n = log_desc_n + 1;
272         ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
273         if (ret) {
274                 DRV_LOG(ERR, "Can't create SQ object.");
275                 rte_errno = ENOMEM;
276                 goto error;
277         }
278         ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
279         if (ret) {
280                 DRV_LOG(ERR, "Can't change SQ state to ready.");
281                 rte_errno = ENOMEM;
282                 goto error;
283         }
284         sq->pi = 0;
285         sq->head = 0;
286         sq->tail = 0;
287         sq->sqn = sq->sq_obj.sq->id;
288         sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
289         rte_spinlock_init(&sq->sqsl);
290         return 0;
291 error:
292         mlx5_aso_destroy_sq(sq);
293         return -1;
294 }
295
296 /**
297  * API to create and initialize Send Queue used for ASO access.
298  *
299  * @param[in] sh
300  *   Pointer to shared device context.
301  * @param[in] aso_opc_mod
302  *   Mode of ASO feature.
303  *
304  * @return
305  *   0 on success, a negative errno value otherwise and rte_errno is set.
306  */
307 int
308 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
309                     enum mlx5_access_aso_opc_mod aso_opc_mod)
310 {
311         uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
312         struct mlx5_common_device *cdev = sh->cdev;
313
314         switch (aso_opc_mod) {
315         case ASO_OPC_MOD_FLOW_HIT:
316                 if (mlx5_aso_reg_mr(sh, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
317                                     sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
318                         return -1;
319                 if (mlx5_aso_sq_create(cdev->ctx, &sh->aso_age_mng->aso_sq, 0,
320                                   sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
321                                   sh->sq_ts_format)) {
322                         mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
323                         return -1;
324                 }
325                 mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
326                 break;
327         case ASO_OPC_MOD_POLICER:
328                 if (mlx5_aso_sq_create(cdev->ctx, &sh->mtrmng->pools_mng.sq, 0,
329                                   sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
330                                   sh->sq_ts_format))
331                         return -1;
332                 mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
333                 break;
334         case ASO_OPC_MOD_CONNECTION_TRACKING:
335                 /* 64B per object for query. */
336                 if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
337                                     &sh->ct_mng->aso_sq.mr, 0))
338                         return -1;
339                 if (mlx5_aso_sq_create(cdev->ctx, &sh->ct_mng->aso_sq, 0,
340                                 sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
341                                 sh->sq_ts_format)) {
342                         mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
343                         return -1;
344                 }
345                 mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
346                 break;
347         default:
348                 DRV_LOG(ERR, "Unknown ASO operation mode");
349                 return -1;
350         }
351         return 0;
352 }
353
354 /**
355  * API to destroy Send Queue used for ASO access.
356  *
357  * @param[in] sh
358  *   Pointer to shared device context.
359  * @param[in] aso_opc_mod
360  *   Mode of ASO feature.
361  */
362 void
363 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
364                       enum mlx5_access_aso_opc_mod aso_opc_mod)
365 {
366         struct mlx5_aso_sq *sq;
367
368         switch (aso_opc_mod) {
369         case ASO_OPC_MOD_FLOW_HIT:
370                 mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
371                 sq = &sh->aso_age_mng->aso_sq;
372                 break;
373         case ASO_OPC_MOD_POLICER:
374                 sq = &sh->mtrmng->pools_mng.sq;
375                 break;
376         case ASO_OPC_MOD_CONNECTION_TRACKING:
377                 mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
378                 sq = &sh->ct_mng->aso_sq;
379                 break;
380         default:
381                 DRV_LOG(ERR, "Unknown ASO operation mode");
382                 return;
383         }
384         mlx5_aso_destroy_sq(sq);
385 }
386
387 /**
388  * Write a burst of WQEs to ASO SQ.
389  *
390  * @param[in] mng
391  *   ASO management data, contains the SQ.
392  * @param[in] n
393  *   Index of the last valid pool.
394  *
395  * @return
396  *   Number of WQEs in burst.
397  */
398 static uint16_t
399 mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
400 {
401         volatile struct mlx5_aso_wqe *wqe;
402         struct mlx5_aso_sq *sq = &mng->aso_sq;
403         struct mlx5_aso_age_pool *pool;
404         uint16_t size = 1 << sq->log_desc_n;
405         uint16_t mask = size - 1;
406         uint16_t max;
407         uint16_t start_head = sq->head;
408
409         max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
410         if (unlikely(!max))
411                 return 0;
412         sq->elts[start_head & mask].burst_size = max;
413         do {
414                 wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
415                 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
416                 /* Fill next WQE. */
417                 rte_spinlock_lock(&mng->resize_sl);
418                 pool = mng->pools[sq->next];
419                 rte_spinlock_unlock(&mng->resize_sl);
420                 sq->elts[sq->head & mask].pool = pool;
421                 wqe->general_cseg.misc =
422                                 rte_cpu_to_be_32(((struct mlx5_devx_obj *)
423                                                  (pool->flow_hit_aso_obj))->id);
424                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
425                                                          MLX5_COMP_MODE_OFFSET);
426                 wqe->general_cseg.opcode = rte_cpu_to_be_32
427                                                 (MLX5_OPCODE_ACCESS_ASO |
428                                                  (ASO_OPC_MOD_FLOW_HIT <<
429                                                   WQE_CSEG_OPC_MOD_OFFSET) |
430                                                  (sq->pi <<
431                                                   WQE_CSEG_WQE_INDEX_OFFSET));
432                 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
433                 sq->head++;
434                 sq->next++;
435                 max--;
436         } while (max);
437         wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
438                                                          MLX5_COMP_MODE_OFFSET);
439         rte_io_wmb();
440         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
441         rte_wmb();
442         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
443         rte_wmb();
444         return sq->elts[start_head & mask].burst_size;
445 }
446
447 /**
448  * Debug utility function. Dump contents of error CQE and WQE.
449  *
450  * @param[in] cqe
451  *   Error CQE to dump.
452  * @param[in] wqe
453  *   Error WQE to dump.
454  */
455 static void
456 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
457 {
458         int i;
459
460         DRV_LOG(ERR, "Error cqe:");
461         for (i = 0; i < 16; i += 4)
462                 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
463                         cqe[i + 2], cqe[i + 3]);
464         DRV_LOG(ERR, "\nError wqe:");
465         for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
466                 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
467                         wqe[i + 2], wqe[i + 3]);
468 }
469
470 /**
471  * Handle case of error CQE.
472  *
473  * @param[in] sq
474  *   ASO SQ to use.
475  */
476 static void
477 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
478 {
479         struct mlx5_aso_cq *cq = &sq->cq;
480         uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
481         volatile struct mlx5_err_cqe *cqe =
482                         (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
483
484         cq->errors++;
485         idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
486         mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
487                                (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
488 }
489
490 /**
491  * Update ASO objects upon completion.
492  *
493  * @param[in] sh
494  *   Shared device context.
495  * @param[in] n
496  *   Number of completed ASO objects.
497  */
498 static void
499 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
500 {
501         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
502         struct mlx5_aso_sq *sq = &mng->aso_sq;
503         struct mlx5_age_info *age_info;
504         const uint16_t size = 1 << sq->log_desc_n;
505         const uint16_t mask = size - 1;
506         const uint64_t curr = MLX5_CURR_TIME_SEC;
507         uint16_t expected = AGE_CANDIDATE;
508         uint16_t i;
509
510         for (i = 0; i < n; ++i) {
511                 uint16_t idx = (sq->tail + i) & mask;
512                 struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
513                 uint64_t diff = curr - pool->time_of_last_age_check;
514                 uint64_t *addr = sq->mr.addr;
515                 int j;
516
517                 addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
518                 pool->time_of_last_age_check = curr;
519                 for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
520                         struct mlx5_aso_age_action *act = &pool->actions[j];
521                         struct mlx5_age_param *ap = &act->age_params;
522                         uint8_t byte;
523                         uint8_t offset;
524                         uint8_t *u8addr;
525                         uint8_t hit;
526
527                         if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
528                                             AGE_CANDIDATE)
529                                 continue;
530                         byte = 63 - (j / 8);
531                         offset = j % 8;
532                         u8addr = (uint8_t *)addr;
533                         hit = (u8addr[byte] >> offset) & 0x1;
534                         if (hit) {
535                                 __atomic_store_n(&ap->sec_since_last_hit, 0,
536                                                  __ATOMIC_RELAXED);
537                         } else {
538                                 struct mlx5_priv *priv;
539
540                                 __atomic_fetch_add(&ap->sec_since_last_hit,
541                                                    diff, __ATOMIC_RELAXED);
542                                 /* If timeout passed add to aged-out list. */
543                                 if (ap->sec_since_last_hit <= ap->timeout)
544                                         continue;
545                                 priv =
546                                 rte_eth_devices[ap->port_id].data->dev_private;
547                                 age_info = GET_PORT_AGE_INFO(priv);
548                                 rte_spinlock_lock(&age_info->aged_sl);
549                                 if (__atomic_compare_exchange_n(&ap->state,
550                                                                 &expected,
551                                                                 AGE_TMOUT,
552                                                                 false,
553                                                                __ATOMIC_RELAXED,
554                                                             __ATOMIC_RELAXED)) {
555                                         LIST_INSERT_HEAD(&age_info->aged_aso,
556                                                          act, next);
557                                         MLX5_AGE_SET(age_info,
558                                                      MLX5_AGE_EVENT_NEW);
559                                 }
560                                 rte_spinlock_unlock(&age_info->aged_sl);
561                         }
562                 }
563         }
564         mlx5_age_event_prepare(sh);
565 }
566
567 /**
568  * Handle completions from WQEs sent to ASO SQ.
569  *
570  * @param[in] sh
571  *   Shared device context.
572  *
573  * @return
574  *   Number of CQEs handled.
575  */
576 static uint16_t
577 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
578 {
579         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
580         struct mlx5_aso_sq *sq = &mng->aso_sq;
581         struct mlx5_aso_cq *cq = &sq->cq;
582         volatile struct mlx5_cqe *restrict cqe;
583         const unsigned int cq_size = 1 << cq->log_desc_n;
584         const unsigned int mask = cq_size - 1;
585         uint32_t idx;
586         uint32_t next_idx = cq->cq_ci & mask;
587         const uint16_t max = (uint16_t)(sq->head - sq->tail);
588         uint16_t i = 0;
589         int ret;
590         if (unlikely(!max))
591                 return 0;
592         do {
593                 idx = next_idx;
594                 next_idx = (cq->cq_ci + 1) & mask;
595                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
596                 cqe = &cq->cq_obj.cqes[idx];
597                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
598                 /*
599                  * Be sure owner read is done before any other cookie field or
600                  * opaque field.
601                  */
602                 rte_io_rmb();
603                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
604                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
605                                 break;
606                         mlx5_aso_cqe_err_handle(sq);
607                 } else {
608                         i += sq->elts[(sq->tail + i) & mask].burst_size;
609                 }
610                 cq->cq_ci++;
611         } while (1);
612         if (likely(i)) {
613                 mlx5_aso_age_action_update(sh, i);
614                 sq->tail += i;
615                 rte_io_wmb();
616                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
617         }
618         return i;
619 }
620
621 /**
622  * Periodically read CQEs and send WQEs to ASO SQ.
623  *
624  * @param[in] arg
625  *   Shared device context containing the ASO SQ.
626  */
627 static void
628 mlx5_flow_aso_alarm(void *arg)
629 {
630         struct mlx5_dev_ctx_shared *sh = arg;
631         struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
632         uint32_t us = 100u;
633         uint16_t n;
634
635         rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
636         n = sh->aso_age_mng->next;
637         rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
638         mlx5_aso_completion_handle(sh);
639         if (sq->next == n) {
640                 /* End of loop: wait 1 second. */
641                 us = US_PER_S;
642                 sq->next = 0;
643         }
644         mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
645         if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
646                 DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
647 }
648
649 /**
650  * API to start ASO access using ASO SQ.
651  *
652  * @param[in] sh
653  *   Pointer to shared device context.
654  *
655  * @return
656  *   0 on success, a negative errno value otherwise and rte_errno is set.
657  */
658 int
659 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
660 {
661         if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
662                 DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
663                 return -rte_errno;
664         }
665         return 0;
666 }
667
668 /**
669  * API to stop ASO access using ASO SQ.
670  *
671  * @param[in] sh
672  *   Pointer to shared device context.
673  *
674  * @return
675  *   0 on success, a negative errno value otherwise and rte_errno is set.
676  */
677 int
678 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
679 {
680         int retries = 1024;
681
682         if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
683                 return -EINVAL;
684         rte_errno = 0;
685         while (--retries) {
686                 rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
687                 if (rte_errno != EINPROGRESS)
688                         break;
689                 rte_pause();
690         }
691         return -rte_errno;
692 }
693
694 static uint16_t
695 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
696                 struct mlx5_aso_mtr *aso_mtr)
697 {
698         volatile struct mlx5_aso_wqe *wqe = NULL;
699         struct mlx5_flow_meter_info *fm = NULL;
700         struct mlx5_flow_meter_profile *fmp;
701         uint16_t size = 1 << sq->log_desc_n;
702         uint16_t mask = size - 1;
703         uint16_t res;
704         uint32_t dseg_idx = 0;
705         struct mlx5_aso_mtr_pool *pool = NULL;
706
707         rte_spinlock_lock(&sq->sqsl);
708         res = size - (uint16_t)(sq->head - sq->tail);
709         if (unlikely(!res)) {
710                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
711                 rte_spinlock_unlock(&sq->sqsl);
712                 return 0;
713         }
714         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
715         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
716         /* Fill next WQE. */
717         fm = &aso_mtr->fm;
718         sq->elts[sq->head & mask].mtr = aso_mtr;
719         pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
720                         mtrs[aso_mtr->offset]);
721         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
722                         (aso_mtr->offset >> 1));
723         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
724                         (ASO_OPC_MOD_POLICER <<
725                         WQE_CSEG_OPC_MOD_OFFSET) |
726                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
727         /* There are 2 meters in one ASO cache line. */
728         dseg_idx = aso_mtr->offset & 0x1;
729         wqe->aso_cseg.data_mask =
730                 RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
731         if (fm->is_enable) {
732                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
733                         fm->profile->srtcm_prm.cbs_cir;
734                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
735                         fm->profile->srtcm_prm.ebs_eir;
736         } else {
737                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
738                         RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
739                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
740         }
741         fmp = fm->profile;
742         if (fmp->profile.packet_mode)
743                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
744                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
745                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
746                                 (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
747         else
748                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
749                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
750                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
751         switch (fmp->profile.alg) {
752         case RTE_MTR_SRTCM_RFC2697:
753                 /* Only needed for RFC2697. */
754                 if (fm->profile->srtcm_prm.ebs_eir)
755                         wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
756                                         RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
757                 break;
758         case RTE_MTR_TRTCM_RFC2698:
759                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
760                                 RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
761                 break;
762         case RTE_MTR_TRTCM_RFC4115:
763         default:
764                 break;
765         }
766         /*
767          * Note:
768          * Due to software performance reason, the token fields will not be
769          * set when posting the WQE to ASO SQ. It will be filled by the HW
770          * automatically.
771          */
772         sq->head++;
773         sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
774         rte_io_wmb();
775         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
776         rte_wmb();
777         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
778         rte_wmb();
779         rte_spinlock_unlock(&sq->sqsl);
780         return 1;
781 }
782
783 static void
784 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
785 {
786         uint16_t size = 1 << sq->log_desc_n;
787         uint16_t mask = size - 1;
788         uint16_t i;
789         struct mlx5_aso_mtr *aso_mtr = NULL;
790         uint8_t exp_state = ASO_METER_WAIT;
791
792         for (i = 0; i < aso_mtrs_nums; ++i) {
793                 aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
794                 MLX5_ASSERT(aso_mtr);
795                 (void)__atomic_compare_exchange_n(&aso_mtr->state,
796                                 &exp_state, ASO_METER_READY,
797                                 false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
798         }
799 }
800
801 static void
802 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
803 {
804         struct mlx5_aso_cq *cq = &sq->cq;
805         volatile struct mlx5_cqe *restrict cqe;
806         const unsigned int cq_size = 1 << cq->log_desc_n;
807         const unsigned int mask = cq_size - 1;
808         uint32_t idx;
809         uint32_t next_idx = cq->cq_ci & mask;
810         uint16_t max;
811         uint16_t n = 0;
812         int ret;
813
814         rte_spinlock_lock(&sq->sqsl);
815         max = (uint16_t)(sq->head - sq->tail);
816         if (unlikely(!max)) {
817                 rte_spinlock_unlock(&sq->sqsl);
818                 return;
819         }
820         do {
821                 idx = next_idx;
822                 next_idx = (cq->cq_ci + 1) & mask;
823                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
824                 cqe = &cq->cq_obj.cqes[idx];
825                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
826                 /*
827                  * Be sure owner read is done before any other cookie field or
828                  * opaque field.
829                  */
830                 rte_io_rmb();
831                 if (ret != MLX5_CQE_STATUS_SW_OWN) {
832                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
833                                 break;
834                         mlx5_aso_cqe_err_handle(sq);
835                 } else {
836                         n++;
837                 }
838                 cq->cq_ci++;
839         } while (1);
840         if (likely(n)) {
841                 mlx5_aso_mtrs_status_update(sq, n);
842                 sq->tail += n;
843                 rte_io_wmb();
844                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
845         }
846         rte_spinlock_unlock(&sq->sqsl);
847 }
848
849 /**
850  * Update meter parameter by send WQE.
851  *
852  * @param[in] dev
853  *   Pointer to Ethernet device.
854  * @param[in] priv
855  *   Pointer to mlx5 private data structure.
856  * @param[in] fm
857  *   Pointer to flow meter to be modified.
858  *
859  * @return
860  *   0 on success, a negative errno value otherwise and rte_errno is set.
861  */
862 int
863 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
864                         struct mlx5_aso_mtr *mtr)
865 {
866         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
867         uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
868
869         do {
870                 mlx5_aso_mtr_completion_handle(sq);
871                 if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
872                         return 0;
873                 /* Waiting for wqe resource. */
874                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
875         } while (--poll_wqe_times);
876         DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
877                         mtr->offset);
878         return -1;
879 }
880
881 /**
882  * Wait for meter to be ready.
883  *
884  * @param[in] dev
885  *   Pointer to Ethernet device.
886  * @param[in] priv
887  *   Pointer to mlx5 private data structure.
888  * @param[in] fm
889  *   Pointer to flow meter to be modified.
890  *
891  * @return
892  *   0 on success, a negative errno value otherwise and rte_errno is set.
893  */
894 int
895 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
896                         struct mlx5_aso_mtr *mtr)
897 {
898         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
899         uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
900
901         if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
902                                             ASO_METER_READY)
903                 return 0;
904         do {
905                 mlx5_aso_mtr_completion_handle(sq);
906                 if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
907                                             ASO_METER_READY)
908                         return 0;
909                 /* Waiting for CQE ready. */
910                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
911         } while (--poll_cqe_times);
912         DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
913                         mtr->offset);
914         return -1;
915 }
916
917 /*
918  * Post a WQE to the ASO CT SQ to modify the context.
919  *
920  * @param[in] mng
921  *   Pointer to the CT pools management structure.
922  * @param[in] ct
923  *   Pointer to the generic CT structure related to the context.
924  * @param[in] profile
925  *   Pointer to configuration profile.
926  *
927  * @return
928  *   1 on success (WQE number), 0 on failure.
929  */
930 static uint16_t
931 mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
932                               struct mlx5_aso_ct_action *ct,
933                               const struct rte_flow_action_conntrack *profile)
934 {
935         volatile struct mlx5_aso_wqe *wqe = NULL;
936         struct mlx5_aso_sq *sq = &mng->aso_sq;
937         uint16_t size = 1 << sq->log_desc_n;
938         uint16_t mask = size - 1;
939         uint16_t res;
940         struct mlx5_aso_ct_pool *pool;
941         void *desg;
942         void *orig_dir;
943         void *reply_dir;
944
945         rte_spinlock_lock(&sq->sqsl);
946         /* Prevent other threads to update the index. */
947         res = size - (uint16_t)(sq->head - sq->tail);
948         if (unlikely(!res)) {
949                 rte_spinlock_unlock(&sq->sqsl);
950                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
951                 return 0;
952         }
953         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
954         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
955         /* Fill next WQE. */
956         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
957         sq->elts[sq->head & mask].ct = ct;
958         sq->elts[sq->head & mask].query_data = NULL;
959         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
960         /* Each WQE will have a single CT object. */
961         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
962                                                   ct->offset);
963         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
964                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
965                          WQE_CSEG_OPC_MOD_OFFSET) |
966                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
967         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
968                         (0u |
969                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
970                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
971                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
972                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
973         wqe->aso_cseg.data_mask = UINT64_MAX;
974         /* To make compiler happy. */
975         desg = (void *)(uintptr_t)wqe->aso_dseg.data;
976         MLX5_SET(conn_track_aso, desg, valid, 1);
977         MLX5_SET(conn_track_aso, desg, state, profile->state);
978         MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
979         MLX5_SET(conn_track_aso, desg, connection_assured,
980                  profile->live_connection);
981         MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
982         MLX5_SET(conn_track_aso, desg, challenged_acked,
983                  profile->challenge_ack_passed);
984         /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
985         MLX5_SET(conn_track_aso, desg, heartbeat, 0);
986         MLX5_SET(conn_track_aso, desg, max_ack_window,
987                  profile->max_ack_window);
988         MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
989         MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
990         MLX5_SET(conn_track_aso, desg, retranmission_limit,
991                  profile->retransmission_limit);
992         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
993                  profile->reply_dir.scale);
994         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
995                  profile->reply_dir.close_initiated);
996         /* Both directions will use the same liberal mode. */
997         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
998                  profile->liberal_mode);
999         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
1000                  profile->reply_dir.data_unacked);
1001         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1002                  profile->reply_dir.last_ack_seen);
1003         MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1004                  profile->original_dir.scale);
1005         MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1006                  profile->original_dir.close_initiated);
1007         MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1008                  profile->liberal_mode);
1009         MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1010                  profile->original_dir.data_unacked);
1011         MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1012                  profile->original_dir.last_ack_seen);
1013         MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1014         MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1015         MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1016         MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1017         MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1018         MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1019         orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1020         MLX5_SET(tcp_window_params, orig_dir, sent_end,
1021                  profile->original_dir.sent_end);
1022         MLX5_SET(tcp_window_params, orig_dir, reply_end,
1023                  profile->original_dir.reply_end);
1024         MLX5_SET(tcp_window_params, orig_dir, max_win,
1025                  profile->original_dir.max_win);
1026         MLX5_SET(tcp_window_params, orig_dir, max_ack,
1027                  profile->original_dir.max_ack);
1028         reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1029         MLX5_SET(tcp_window_params, reply_dir, sent_end,
1030                  profile->reply_dir.sent_end);
1031         MLX5_SET(tcp_window_params, reply_dir, reply_end,
1032                  profile->reply_dir.reply_end);
1033         MLX5_SET(tcp_window_params, reply_dir, max_win,
1034                  profile->reply_dir.max_win);
1035         MLX5_SET(tcp_window_params, reply_dir, max_ack,
1036                  profile->reply_dir.max_ack);
1037         sq->head++;
1038         sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1039         rte_io_wmb();
1040         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1041         rte_wmb();
1042         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1043         rte_wmb();
1044         rte_spinlock_unlock(&sq->sqsl);
1045         return 1;
1046 }
1047
1048 /*
1049  * Update the status field of CTs to indicate ready to be used by flows.
1050  * A continuous number of CTs since last update.
1051  *
1052  * @param[in] sq
1053  *   Pointer to ASO CT SQ.
1054  * @param[in] num
1055  *   Number of CT structures to be updated.
1056  *
1057  * @return
1058  *   0 on success, a negative value.
1059  */
1060 static void
1061 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1062 {
1063         uint16_t size = 1 << sq->log_desc_n;
1064         uint16_t mask = size - 1;
1065         uint16_t i;
1066         struct mlx5_aso_ct_action *ct = NULL;
1067         uint16_t idx;
1068
1069         for (i = 0; i < num; i++) {
1070                 idx = (uint16_t)((sq->tail + i) & mask);
1071                 ct = sq->elts[idx].ct;
1072                 MLX5_ASSERT(ct);
1073                 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1074                 if (sq->elts[idx].query_data)
1075                         rte_memcpy(sq->elts[idx].query_data,
1076                                    (char *)((uintptr_t)sq->mr.addr + idx * 64),
1077                                    64);
1078         }
1079 }
1080
1081 /*
1082  * Post a WQE to the ASO CT SQ to query the current context.
1083  *
1084  * @param[in] mng
1085  *   Pointer to the CT pools management structure.
1086  * @param[in] ct
1087  *   Pointer to the generic CT structure related to the context.
1088  * @param[in] data
1089  *   Pointer to data area to be filled.
1090  *
1091  * @return
1092  *   1 on success (WQE number), 0 on failure.
1093  */
1094 static int
1095 mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
1096                             struct mlx5_aso_ct_action *ct, char *data)
1097 {
1098         volatile struct mlx5_aso_wqe *wqe = NULL;
1099         struct mlx5_aso_sq *sq = &mng->aso_sq;
1100         uint16_t size = 1 << sq->log_desc_n;
1101         uint16_t mask = size - 1;
1102         uint16_t res;
1103         uint16_t wqe_idx;
1104         struct mlx5_aso_ct_pool *pool;
1105         enum mlx5_aso_ct_state state =
1106                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1107
1108         if (state == ASO_CONNTRACK_FREE) {
1109                 DRV_LOG(ERR, "Fail: No context to query");
1110                 return -1;
1111         } else if (state == ASO_CONNTRACK_WAIT) {
1112                 return 0;
1113         }
1114         rte_spinlock_lock(&sq->sqsl);
1115         res = size - (uint16_t)(sq->head - sq->tail);
1116         if (unlikely(!res)) {
1117                 rte_spinlock_unlock(&sq->sqsl);
1118                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1119                 return 0;
1120         }
1121         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1122         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1123         /* Confirm the location and address of the prefetch instruction. */
1124         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1125         /* Fill next WQE. */
1126         wqe_idx = sq->head & mask;
1127         sq->elts[wqe_idx].ct = ct;
1128         sq->elts[wqe_idx].query_data = data;
1129         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1130         /* Each WQE will have a single CT object. */
1131         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1132                                                   ct->offset);
1133         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1134                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
1135                          WQE_CSEG_OPC_MOD_OFFSET) |
1136                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1137         /*
1138          * There is no write request is required.
1139          * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1140          * "BYTEWISE_64BYTE" is needed for a whole context.
1141          * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1142          * "data_mask" is ignored.
1143          * Buffer address was already filled during initialization.
1144          */
1145         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1146                                         ASO_CSEG_DATA_MASK_MODE_OFFSET);
1147         wqe->aso_cseg.data_mask = 0;
1148         sq->head++;
1149         /*
1150          * Each WQE contains 2 WQEBB's, even though
1151          * data segment is not used in this case.
1152          */
1153         sq->pi += 2;
1154         rte_io_wmb();
1155         sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1156         rte_wmb();
1157         *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1158         rte_wmb();
1159         rte_spinlock_unlock(&sq->sqsl);
1160         return 1;
1161 }
1162
1163 /*
1164  * Handle completions from WQEs sent to ASO CT.
1165  *
1166  * @param[in] mng
1167  *   Pointer to the CT pools management structure.
1168  */
1169 static void
1170 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1171 {
1172         struct mlx5_aso_sq *sq = &mng->aso_sq;
1173         struct mlx5_aso_cq *cq = &sq->cq;
1174         volatile struct mlx5_cqe *restrict cqe;
1175         const uint32_t cq_size = 1 << cq->log_desc_n;
1176         const uint32_t mask = cq_size - 1;
1177         uint32_t idx;
1178         uint32_t next_idx;
1179         uint16_t max;
1180         uint16_t n = 0;
1181         int ret;
1182
1183         rte_spinlock_lock(&sq->sqsl);
1184         max = (uint16_t)(sq->head - sq->tail);
1185         if (unlikely(!max)) {
1186                 rte_spinlock_unlock(&sq->sqsl);
1187                 return;
1188         }
1189         next_idx = cq->cq_ci & mask;
1190         do {
1191                 idx = next_idx;
1192                 next_idx = (cq->cq_ci + 1) & mask;
1193                 /* Need to confirm the position of the prefetch. */
1194                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1195                 cqe = &cq->cq_obj.cqes[idx];
1196                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
1197                 /*
1198                  * Be sure owner read is done before any other cookie field or
1199                  * opaque field.
1200                  */
1201                 rte_io_rmb();
1202                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1203                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1204                                 break;
1205                         mlx5_aso_cqe_err_handle(sq);
1206                 } else {
1207                         n++;
1208                 }
1209                 cq->cq_ci++;
1210         } while (1);
1211         if (likely(n)) {
1212                 mlx5_aso_ct_status_update(sq, n);
1213                 sq->tail += n;
1214                 rte_io_wmb();
1215                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1216         }
1217         rte_spinlock_unlock(&sq->sqsl);
1218 }
1219
1220 /*
1221  * Update connection tracking ASO context by sending WQE.
1222  *
1223  * @param[in] sh
1224  *   Pointer to mlx5_dev_ctx_shared object.
1225  * @param[in] ct
1226  *   Pointer to connection tracking offload object.
1227  * @param[in] profile
1228  *   Pointer to connection tracking TCP parameter.
1229  *
1230  * @return
1231  *   0 on success, -1 on failure.
1232  */
1233 int
1234 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1235                           struct mlx5_aso_ct_action *ct,
1236                           const struct rte_flow_action_conntrack *profile)
1237 {
1238         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1239         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1240         struct mlx5_aso_ct_pool *pool;
1241
1242         MLX5_ASSERT(ct);
1243         do {
1244                 mlx5_aso_ct_completion_handle(mng);
1245                 if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
1246                         return 0;
1247                 /* Waiting for wqe resource. */
1248                 rte_delay_us_sleep(10u);
1249         } while (--poll_wqe_times);
1250         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1251         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1252                 ct->offset, pool->index);
1253         return -1;
1254 }
1255
1256 /*
1257  * The routine is used to wait for WQE completion to continue with queried data.
1258  *
1259  * @param[in] sh
1260  *   Pointer to mlx5_dev_ctx_shared object.
1261  * @param[in] ct
1262  *   Pointer to connection tracking offload object.
1263  *
1264  * @return
1265  *   0 on success, -1 on failure.
1266  */
1267 int
1268 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1269                        struct mlx5_aso_ct_action *ct)
1270 {
1271         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1272         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1273         struct mlx5_aso_ct_pool *pool;
1274
1275         if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1276             ASO_CONNTRACK_READY)
1277                 return 0;
1278         do {
1279                 mlx5_aso_ct_completion_handle(mng);
1280                 if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1281                     ASO_CONNTRACK_READY)
1282                         return 0;
1283                 /* Waiting for CQE ready, consider should block or sleep. */
1284                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1285         } while (--poll_cqe_times);
1286         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1287         DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1288                 ct->offset, pool->index);
1289         return -1;
1290 }
1291
1292 /*
1293  * Convert the hardware conntrack data format into the profile.
1294  *
1295  * @param[in] profile
1296  *   Pointer to conntrack profile to be filled after query.
1297  * @param[in] wdata
1298  *   Pointer to data fetched from hardware.
1299  */
1300 static inline void
1301 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1302                         char *wdata)
1303 {
1304         void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1305         void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1306
1307         /* MLX5_GET16 should be taken into consideration. */
1308         profile->state = (enum rte_flow_conntrack_state)
1309                          MLX5_GET(conn_track_aso, wdata, state);
1310         profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1311         profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1312                                           sack_permitted);
1313         profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1314                                             connection_assured);
1315         profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1316                                                  challenged_acked);
1317         profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1318                                            max_ack_window);
1319         profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1320                                                  retranmission_limit);
1321         profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1322         profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1323         profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1324                               MLX5_GET(conn_track_aso, wdata, last_index);
1325         profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1326         profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1327         profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1328         profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1329                                 reply_direction_tcp_liberal_enabled) |
1330                                 MLX5_GET(conn_track_aso, wdata,
1331                                 original_direction_tcp_liberal_enabled);
1332         /* No liberal in the RTE structure profile. */
1333         profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1334                                             reply_direction_tcp_scale);
1335         profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1336                                         reply_direction_tcp_close_initiated);
1337         profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1338                                         reply_direction_tcp_data_unacked);
1339         profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1340                                         reply_direction_tcp_max_ack);
1341         profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1342                                                r_dir, sent_end);
1343         profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1344                                                 r_dir, reply_end);
1345         profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1346                                               r_dir, max_win);
1347         profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1348                                               r_dir, max_ack);
1349         profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1350                                                original_direction_tcp_scale);
1351         profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1352                                         original_direction_tcp_close_initiated);
1353         profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1354                                         original_direction_tcp_data_unacked);
1355         profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1356                                         original_direction_tcp_max_ack);
1357         profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1358                                                   o_dir, sent_end);
1359         profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1360                                                    o_dir, reply_end);
1361         profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1362                                                  o_dir, max_win);
1363         profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1364                                                  o_dir, max_ack);
1365 }
1366
1367 /*
1368  * Query connection tracking information parameter by send WQE.
1369  *
1370  * @param[in] dev
1371  *   Pointer to Ethernet device.
1372  * @param[in] ct
1373  *   Pointer to connection tracking offload object.
1374  * @param[out] profile
1375  *   Pointer to connection tracking TCP information.
1376  *
1377  * @return
1378  *   0 on success, -1 on failure.
1379  */
1380 int
1381 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1382                          struct mlx5_aso_ct_action *ct,
1383                          struct rte_flow_action_conntrack *profile)
1384 {
1385         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1386         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1387         struct mlx5_aso_ct_pool *pool;
1388         char out_data[64 * 2];
1389         int ret;
1390
1391         MLX5_ASSERT(ct);
1392         do {
1393                 mlx5_aso_ct_completion_handle(mng);
1394                 ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
1395                 if (ret < 0)
1396                         return ret;
1397                 else if (ret > 0)
1398                         goto data_handle;
1399                 /* Waiting for wqe resource or state. */
1400                 else
1401                         rte_delay_us_sleep(10u);
1402         } while (--poll_wqe_times);
1403         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1404         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1405                 ct->offset, pool->index);
1406         return -1;
1407 data_handle:
1408         ret = mlx5_aso_ct_wait_ready(sh, ct);
1409         if (!ret)
1410                 mlx5_aso_ct_obj_analyze(profile, out_data);
1411         return ret;
1412 }
1413
1414 /*
1415  * Make sure the conntrack context is synchronized with hardware before
1416  * creating a flow rule that uses it.
1417  *
1418  * @param[in] sh
1419  *   Pointer to shared device context.
1420  * @param[in] ct
1421  *   Pointer to connection tracking offload object.
1422  *
1423  * @return
1424  *   0 on success, a negative errno value otherwise and rte_errno is set.
1425  */
1426 int
1427 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1428                       struct mlx5_aso_ct_action *ct)
1429 {
1430         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1431         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1432         enum mlx5_aso_ct_state state =
1433                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1434
1435         if (state == ASO_CONNTRACK_FREE) {
1436                 rte_errno = ENXIO;
1437                 return -rte_errno;
1438         } else if (state == ASO_CONNTRACK_READY ||
1439                    state == ASO_CONNTRACK_QUERY) {
1440                 return 0;
1441         }
1442         do {
1443                 mlx5_aso_ct_completion_handle(mng);
1444                 state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1445                 if (state == ASO_CONNTRACK_READY ||
1446                     state == ASO_CONNTRACK_QUERY)
1447                         return 0;
1448                 /* Waiting for CQE ready, consider should block or sleep. */
1449                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1450         } while (--poll_cqe_times);
1451         rte_errno = EBUSY;
1452         return -rte_errno;
1453 }