net/mlx5: fix modify field destination bit offset
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_aso.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15
16 /**
17  * Destroy Completion Queue used for ASO access.
18  *
19  * @param[in] cq
20  *   ASO CQ to destroy.
21  */
22 static void
23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
24 {
25         if (cq->cq_obj.cq)
26                 mlx5_devx_cq_destroy(&cq->cq_obj);
27         memset(cq, 0, sizeof(*cq));
28 }
29
30 /**
31  * Create Completion Queue used for ASO access.
32  *
33  * @param[in] ctx
34  *   Context returned from mlx5 open_device() glue function.
35  * @param[in/out] cq
36  *   Pointer to CQ to create.
37  * @param[in] log_desc_n
38  *   Log of number of descriptors in queue.
39  * @param[in] socket
40  *   Socket to use for allocation.
41  * @param[in] uar_page_id
42  *   UAR page ID to use.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
49                    int socket, int uar_page_id)
50 {
51         struct mlx5_devx_cq_attr attr = {
52                 .uar_page_id = uar_page_id,
53         };
54
55         cq->log_desc_n = log_desc_n;
56         cq->cq_ci = 0;
57         return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
58 }
59
60 /**
61  * Free MR resources.
62  *
63  * @param[in] cdev
64  *   Pointer to the mlx5 common device.
65  * @param[in] mr
66  *   MR to free.
67  */
68 static void
69 mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
70 {
71         void *addr = mr->addr;
72
73         cdev->mr_scache.dereg_mr_cb(mr);
74         mlx5_free(addr);
75         memset(mr, 0, sizeof(*mr));
76 }
77
78 /**
79  * Register Memory Region.
80  *
81  * @param[in] cdev
82  *   Pointer to the mlx5 common device.
83  * @param[in] length
84  *   Size of MR buffer.
85  * @param[in/out] mr
86  *   Pointer to MR to create.
87  * @param[in] socket
88  *   Socket to use for allocation.
89  *
90  * @return
91  *   0 on success, a negative errno value otherwise and rte_errno is set.
92  */
93 static int
94 mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
95                 struct mlx5_pmd_mr *mr, int socket)
96 {
97
98         int ret;
99
100         mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
101                                socket);
102         if (!mr->addr) {
103                 DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
104                 return -1;
105         }
106         ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
107         if (ret) {
108                 DRV_LOG(ERR, "Failed to create direct Mkey.");
109                 mlx5_free(mr->addr);
110                 return -1;
111         }
112         return 0;
113 }
114
115 /**
116  * Destroy Send Queue used for ASO access.
117  *
118  * @param[in] sq
119  *   ASO SQ to destroy.
120  */
121 static void
122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
123 {
124         mlx5_devx_sq_destroy(&sq->sq_obj);
125         mlx5_aso_cq_destroy(&sq->cq);
126         memset(sq, 0, sizeof(*sq));
127 }
128
129 /**
130  * Initialize Send Queue used for ASO access.
131  *
132  * @param[in] sq
133  *   ASO SQ to initialize.
134  */
135 static void
136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
137 {
138         volatile struct mlx5_aso_wqe *restrict wqe;
139         int i;
140         int size = 1 << sq->log_desc_n;
141         uint64_t addr;
142
143         /* All the next fields state should stay constant. */
144         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
145                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
146                                                           (sizeof(*wqe) >> 4));
147                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
148                 addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
149                                             MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
150                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
151                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
152                 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
153                         (0u |
154                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
155                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
156                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
157                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
158                 wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
159         }
160 }
161
162 /**
163  * Initialize Send Queue used for ASO flow meter access.
164  *
165  * @param[in] sq
166  *   ASO SQ to initialize.
167  */
168 static void
169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
170 {
171         volatile struct mlx5_aso_wqe *restrict wqe;
172         int i;
173         int size = 1 << sq->log_desc_n;
174
175         /* All the next fields state should stay constant. */
176         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
177                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
178                                                           (sizeof(*wqe) >> 4));
179                 wqe->aso_cseg.operand_masks = RTE_BE32(0u |
180                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
181                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
182                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
183                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
184                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
185                                                          MLX5_COMP_MODE_OFFSET);
186         }
187 }
188
189 /*
190  * Initialize Send Queue used for ASO connection tracking.
191  *
192  * @param[in] sq
193  *   ASO SQ to initialize.
194  */
195 static void
196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
197 {
198         volatile struct mlx5_aso_wqe *restrict wqe;
199         int i;
200         int size = 1 << sq->log_desc_n;
201         uint64_t addr;
202
203         /* All the next fields state should stay constant. */
204         for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
205                 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
206                                                           (sizeof(*wqe) >> 4));
207                 /* One unique MR for the query data. */
208                 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
209                 /* Magic number 64 represents the length of a ASO CT obj. */
210                 addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
211                 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
212                 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
213                 /*
214                  * The values of operand_masks are different for modify
215                  * and query.
216                  * And data_mask may be different for each modification. In
217                  * query, it could be zero and ignored.
218                  * CQE generation is always needed, in order to decide when
219                  * it is available to create the flow or read the data.
220                  */
221                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
222                                                    MLX5_COMP_MODE_OFFSET);
223         }
224 }
225
226 /**
227  * Create Send Queue used for ASO access.
228  *
229  * @param[in] ctx
230  *   Context returned from mlx5 open_device() glue function.
231  * @param[in/out] sq
232  *   Pointer to SQ to create.
233  * @param[in] socket
234  *   Socket to use for allocation.
235  * @param[in] uar
236  *   User Access Region object.
237  * @param[in] pdn
238  *   Protection Domain number to use.
239  * @param[in] log_desc_n
240  *   Log of number of descriptors in queue.
241  * @param[in] ts_format
242  *   timestamp format supported by the queue.
243  *
244  * @return
245  *   0 on success, a negative errno value otherwise and rte_errno is set.
246  */
247 static int
248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
249                    uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
250 {
251         struct mlx5_devx_create_sq_attr attr = {
252                 .user_index = 0xFFFF,
253                 .wq_attr = (struct mlx5_devx_wq_attr){
254                         .pd = pdn,
255                         .uar_page = mlx5_os_get_devx_uar_page_id(uar),
256                 },
257                 .ts_format = mlx5_ts_format_conv(ts_format),
258         };
259         struct mlx5_devx_modify_sq_attr modify_attr = {
260                 .state = MLX5_SQC_STATE_RDY,
261         };
262         uint16_t log_wqbb_n;
263         int ret;
264
265         if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
266                                mlx5_os_get_devx_uar_page_id(uar)))
267                 goto error;
268         sq->log_desc_n = log_desc_n;
269         attr.cqn = sq->cq.cq_obj.cq->id;
270         /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
271         log_wqbb_n = log_desc_n + 1;
272         ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
273         if (ret) {
274                 DRV_LOG(ERR, "Can't create SQ object.");
275                 rte_errno = ENOMEM;
276                 goto error;
277         }
278         ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
279         if (ret) {
280                 DRV_LOG(ERR, "Can't change SQ state to ready.");
281                 rte_errno = ENOMEM;
282                 goto error;
283         }
284         sq->pi = 0;
285         sq->head = 0;
286         sq->tail = 0;
287         sq->sqn = sq->sq_obj.sq->id;
288         rte_spinlock_init(&sq->sqsl);
289         return 0;
290 error:
291         mlx5_aso_destroy_sq(sq);
292         return -1;
293 }
294
295 /**
296  * API to create and initialize Send Queue used for ASO access.
297  *
298  * @param[in] sh
299  *   Pointer to shared device context.
300  * @param[in] aso_opc_mod
301  *   Mode of ASO feature.
302  *
303  * @return
304  *   0 on success, a negative errno value otherwise and rte_errno is set.
305  */
306 int
307 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
308                     enum mlx5_access_aso_opc_mod aso_opc_mod)
309 {
310         uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
311         struct mlx5_common_device *cdev = sh->cdev;
312
313         switch (aso_opc_mod) {
314         case ASO_OPC_MOD_FLOW_HIT:
315                 if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
316                                     sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
317                         return -1;
318                 if (mlx5_aso_sq_create(cdev->ctx, &sh->aso_age_mng->aso_sq, 0,
319                                        sh->tx_uar.obj, cdev->pdn,
320                                        MLX5_ASO_QUEUE_LOG_DESC,
321                                        cdev->config.hca_attr.sq_ts_format)) {
322                         mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
323                         return -1;
324                 }
325                 mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
326                 break;
327         case ASO_OPC_MOD_POLICER:
328                 if (mlx5_aso_sq_create(cdev->ctx, &sh->mtrmng->pools_mng.sq, 0,
329                                        sh->tx_uar.obj, cdev->pdn,
330                                        MLX5_ASO_QUEUE_LOG_DESC,
331                                        cdev->config.hca_attr.sq_ts_format))
332                         return -1;
333                 mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
334                 break;
335         case ASO_OPC_MOD_CONNECTION_TRACKING:
336                 /* 64B per object for query. */
337                 if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
338                                     &sh->ct_mng->aso_sq.mr, 0))
339                         return -1;
340                 if (mlx5_aso_sq_create(cdev->ctx, &sh->ct_mng->aso_sq, 0,
341                                        sh->tx_uar.obj, cdev->pdn,
342                                        MLX5_ASO_QUEUE_LOG_DESC,
343                                        cdev->config.hca_attr.sq_ts_format)) {
344                         mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
345                         return -1;
346                 }
347                 mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
348                 break;
349         default:
350                 DRV_LOG(ERR, "Unknown ASO operation mode");
351                 return -1;
352         }
353         return 0;
354 }
355
356 /**
357  * API to destroy Send Queue used for ASO access.
358  *
359  * @param[in] sh
360  *   Pointer to shared device context.
361  * @param[in] aso_opc_mod
362  *   Mode of ASO feature.
363  */
364 void
365 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
366                       enum mlx5_access_aso_opc_mod aso_opc_mod)
367 {
368         struct mlx5_aso_sq *sq;
369
370         switch (aso_opc_mod) {
371         case ASO_OPC_MOD_FLOW_HIT:
372                 mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
373                 sq = &sh->aso_age_mng->aso_sq;
374                 break;
375         case ASO_OPC_MOD_POLICER:
376                 sq = &sh->mtrmng->pools_mng.sq;
377                 break;
378         case ASO_OPC_MOD_CONNECTION_TRACKING:
379                 mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
380                 sq = &sh->ct_mng->aso_sq;
381                 break;
382         default:
383                 DRV_LOG(ERR, "Unknown ASO operation mode");
384                 return;
385         }
386         mlx5_aso_destroy_sq(sq);
387 }
388
389 /**
390  * Write a burst of WQEs to ASO SQ.
391  *
392  * @param[in] sh
393  *   Pointer to shared device context.
394  * @param[in] n
395  *   Index of the last valid pool.
396  *
397  * @return
398  *   Number of WQEs in burst.
399  */
400 static uint16_t
401 mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
402 {
403         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
404         volatile struct mlx5_aso_wqe *wqe;
405         struct mlx5_aso_sq *sq = &mng->aso_sq;
406         struct mlx5_aso_age_pool *pool;
407         uint16_t size = 1 << sq->log_desc_n;
408         uint16_t mask = size - 1;
409         uint16_t max;
410         uint16_t start_head = sq->head;
411
412         max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
413         if (unlikely(!max))
414                 return 0;
415         sq->elts[start_head & mask].burst_size = max;
416         do {
417                 wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
418                 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
419                 /* Fill next WQE. */
420                 rte_rwlock_read_lock(&mng->resize_rwl);
421                 pool = mng->pools[sq->next];
422                 rte_rwlock_read_unlock(&mng->resize_rwl);
423                 sq->elts[sq->head & mask].pool = pool;
424                 wqe->general_cseg.misc =
425                                 rte_cpu_to_be_32(((struct mlx5_devx_obj *)
426                                                  (pool->flow_hit_aso_obj))->id);
427                 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
428                                                          MLX5_COMP_MODE_OFFSET);
429                 wqe->general_cseg.opcode = rte_cpu_to_be_32
430                                                 (MLX5_OPCODE_ACCESS_ASO |
431                                                  (ASO_OPC_MOD_FLOW_HIT <<
432                                                   WQE_CSEG_OPC_MOD_OFFSET) |
433                                                  (sq->pi <<
434                                                   WQE_CSEG_WQE_INDEX_OFFSET));
435                 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
436                 sq->head++;
437                 sq->next++;
438                 max--;
439         } while (max);
440         wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
441                                                          MLX5_COMP_MODE_OFFSET);
442         mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
443                            sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
444                            !sh->tx_uar.dbnc);
445         return sq->elts[start_head & mask].burst_size;
446 }
447
448 /**
449  * Debug utility function. Dump contents of error CQE and WQE.
450  *
451  * @param[in] cqe
452  *   Error CQE to dump.
453  * @param[in] wqe
454  *   Error WQE to dump.
455  */
456 static void
457 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
458 {
459         int i;
460
461         DRV_LOG(ERR, "Error cqe:");
462         for (i = 0; i < 16; i += 4)
463                 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
464                         cqe[i + 2], cqe[i + 3]);
465         DRV_LOG(ERR, "\nError wqe:");
466         for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
467                 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
468                         wqe[i + 2], wqe[i + 3]);
469 }
470
471 /**
472  * Handle case of error CQE.
473  *
474  * @param[in] sq
475  *   ASO SQ to use.
476  */
477 static void
478 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
479 {
480         struct mlx5_aso_cq *cq = &sq->cq;
481         uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
482         volatile struct mlx5_err_cqe *cqe =
483                         (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
484
485         cq->errors++;
486         idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
487         mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
488                                (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
489 }
490
491 /**
492  * Update ASO objects upon completion.
493  *
494  * @param[in] sh
495  *   Shared device context.
496  * @param[in] n
497  *   Number of completed ASO objects.
498  */
499 static void
500 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
501 {
502         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
503         struct mlx5_aso_sq *sq = &mng->aso_sq;
504         struct mlx5_age_info *age_info;
505         const uint16_t size = 1 << sq->log_desc_n;
506         const uint16_t mask = size - 1;
507         const uint64_t curr = MLX5_CURR_TIME_SEC;
508         uint16_t expected = AGE_CANDIDATE;
509         uint16_t i;
510
511         for (i = 0; i < n; ++i) {
512                 uint16_t idx = (sq->tail + i) & mask;
513                 struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
514                 uint64_t diff = curr - pool->time_of_last_age_check;
515                 uint64_t *addr = sq->mr.addr;
516                 int j;
517
518                 addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
519                 pool->time_of_last_age_check = curr;
520                 for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
521                         struct mlx5_aso_age_action *act = &pool->actions[j];
522                         struct mlx5_age_param *ap = &act->age_params;
523                         uint8_t byte;
524                         uint8_t offset;
525                         uint8_t *u8addr;
526                         uint8_t hit;
527
528                         if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
529                                             AGE_CANDIDATE)
530                                 continue;
531                         byte = 63 - (j / 8);
532                         offset = j % 8;
533                         u8addr = (uint8_t *)addr;
534                         hit = (u8addr[byte] >> offset) & 0x1;
535                         if (hit) {
536                                 __atomic_store_n(&ap->sec_since_last_hit, 0,
537                                                  __ATOMIC_RELAXED);
538                         } else {
539                                 struct mlx5_priv *priv;
540
541                                 __atomic_fetch_add(&ap->sec_since_last_hit,
542                                                    diff, __ATOMIC_RELAXED);
543                                 /* If timeout passed add to aged-out list. */
544                                 if (ap->sec_since_last_hit <= ap->timeout)
545                                         continue;
546                                 priv =
547                                 rte_eth_devices[ap->port_id].data->dev_private;
548                                 age_info = GET_PORT_AGE_INFO(priv);
549                                 rte_spinlock_lock(&age_info->aged_sl);
550                                 if (__atomic_compare_exchange_n(&ap->state,
551                                                                 &expected,
552                                                                 AGE_TMOUT,
553                                                                 false,
554                                                                __ATOMIC_RELAXED,
555                                                             __ATOMIC_RELAXED)) {
556                                         LIST_INSERT_HEAD(&age_info->aged_aso,
557                                                          act, next);
558                                         MLX5_AGE_SET(age_info,
559                                                      MLX5_AGE_EVENT_NEW);
560                                 }
561                                 rte_spinlock_unlock(&age_info->aged_sl);
562                         }
563                 }
564         }
565         mlx5_age_event_prepare(sh);
566 }
567
568 /**
569  * Handle completions from WQEs sent to ASO SQ.
570  *
571  * @param[in] sh
572  *   Shared device context.
573  *
574  * @return
575  *   Number of CQEs handled.
576  */
577 static uint16_t
578 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
579 {
580         struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
581         struct mlx5_aso_sq *sq = &mng->aso_sq;
582         struct mlx5_aso_cq *cq = &sq->cq;
583         volatile struct mlx5_cqe *restrict cqe;
584         const unsigned int cq_size = 1 << cq->log_desc_n;
585         const unsigned int mask = cq_size - 1;
586         uint32_t idx;
587         uint32_t next_idx = cq->cq_ci & mask;
588         const uint16_t max = (uint16_t)(sq->head - sq->tail);
589         uint16_t i = 0;
590         int ret;
591         if (unlikely(!max))
592                 return 0;
593         do {
594                 idx = next_idx;
595                 next_idx = (cq->cq_ci + 1) & mask;
596                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
597                 cqe = &cq->cq_obj.cqes[idx];
598                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
599                 /*
600                  * Be sure owner read is done before any other cookie field or
601                  * opaque field.
602                  */
603                 rte_io_rmb();
604                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
605                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
606                                 break;
607                         mlx5_aso_cqe_err_handle(sq);
608                 } else {
609                         i += sq->elts[(sq->tail + i) & mask].burst_size;
610                 }
611                 cq->cq_ci++;
612         } while (1);
613         if (likely(i)) {
614                 mlx5_aso_age_action_update(sh, i);
615                 sq->tail += i;
616                 rte_io_wmb();
617                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
618         }
619         return i;
620 }
621
622 /**
623  * Periodically read CQEs and send WQEs to ASO SQ.
624  *
625  * @param[in] arg
626  *   Shared device context containing the ASO SQ.
627  */
628 static void
629 mlx5_flow_aso_alarm(void *arg)
630 {
631         struct mlx5_dev_ctx_shared *sh = arg;
632         struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
633         uint32_t us = 100u;
634         uint16_t n;
635
636         rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl);
637         n = sh->aso_age_mng->next;
638         rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl);
639         mlx5_aso_completion_handle(sh);
640         if (sq->next == n) {
641                 /* End of loop: wait 1 second. */
642                 us = US_PER_S;
643                 sq->next = 0;
644         }
645         mlx5_aso_sq_enqueue_burst(sh, n);
646         if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
647                 DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
648 }
649
650 /**
651  * API to start ASO access using ASO SQ.
652  *
653  * @param[in] sh
654  *   Pointer to shared device context.
655  *
656  * @return
657  *   0 on success, a negative errno value otherwise and rte_errno is set.
658  */
659 int
660 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
661 {
662         if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
663                 DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
664                 return -rte_errno;
665         }
666         return 0;
667 }
668
669 /**
670  * API to stop ASO access using ASO SQ.
671  *
672  * @param[in] sh
673  *   Pointer to shared device context.
674  *
675  * @return
676  *   0 on success, a negative errno value otherwise and rte_errno is set.
677  */
678 int
679 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
680 {
681         int retries = 1024;
682
683         if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
684                 return -EINVAL;
685         rte_errno = 0;
686         while (--retries) {
687                 rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
688                 if (rte_errno != EINPROGRESS)
689                         break;
690                 rte_pause();
691         }
692         return -rte_errno;
693 }
694
695 static uint16_t
696 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
697                                struct mlx5_aso_sq *sq,
698                                struct mlx5_aso_mtr *aso_mtr)
699 {
700         volatile struct mlx5_aso_wqe *wqe = NULL;
701         struct mlx5_flow_meter_info *fm = NULL;
702         struct mlx5_flow_meter_profile *fmp;
703         uint16_t size = 1 << sq->log_desc_n;
704         uint16_t mask = size - 1;
705         uint16_t res;
706         uint32_t dseg_idx = 0;
707         struct mlx5_aso_mtr_pool *pool = NULL;
708
709         rte_spinlock_lock(&sq->sqsl);
710         res = size - (uint16_t)(sq->head - sq->tail);
711         if (unlikely(!res)) {
712                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
713                 rte_spinlock_unlock(&sq->sqsl);
714                 return 0;
715         }
716         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
717         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
718         /* Fill next WQE. */
719         fm = &aso_mtr->fm;
720         sq->elts[sq->head & mask].mtr = aso_mtr;
721         pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
722                         mtrs[aso_mtr->offset]);
723         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
724                         (aso_mtr->offset >> 1));
725         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
726                         (ASO_OPC_MOD_POLICER <<
727                         WQE_CSEG_OPC_MOD_OFFSET) |
728                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
729         /* There are 2 meters in one ASO cache line. */
730         dseg_idx = aso_mtr->offset & 0x1;
731         wqe->aso_cseg.data_mask =
732                 RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
733         if (fm->is_enable) {
734                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
735                         fm->profile->srtcm_prm.cbs_cir;
736                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
737                         fm->profile->srtcm_prm.ebs_eir;
738         } else {
739                 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
740                         RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
741                 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
742         }
743         fmp = fm->profile;
744         if (fmp->profile.packet_mode)
745                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
746                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
747                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
748                                 (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
749         else
750                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
751                                 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
752                                 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
753         switch (fmp->profile.alg) {
754         case RTE_MTR_SRTCM_RFC2697:
755                 /* Only needed for RFC2697. */
756                 if (fm->profile->srtcm_prm.ebs_eir)
757                         wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
758                                         RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
759                 break;
760         case RTE_MTR_TRTCM_RFC2698:
761                 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
762                                 RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
763                 break;
764         case RTE_MTR_TRTCM_RFC4115:
765         default:
766                 break;
767         }
768         /*
769          * Note:
770          * Due to software performance reason, the token fields will not be
771          * set when posting the WQE to ASO SQ. It will be filled by the HW
772          * automatically.
773          */
774         sq->head++;
775         sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
776         mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
777                            sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
778                            !sh->tx_uar.dbnc);
779         rte_spinlock_unlock(&sq->sqsl);
780         return 1;
781 }
782
783 static void
784 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
785 {
786         uint16_t size = 1 << sq->log_desc_n;
787         uint16_t mask = size - 1;
788         uint16_t i;
789         struct mlx5_aso_mtr *aso_mtr = NULL;
790         uint8_t exp_state = ASO_METER_WAIT;
791
792         for (i = 0; i < aso_mtrs_nums; ++i) {
793                 aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
794                 MLX5_ASSERT(aso_mtr);
795                 (void)__atomic_compare_exchange_n(&aso_mtr->state,
796                                 &exp_state, ASO_METER_READY,
797                                 false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
798         }
799 }
800
801 static void
802 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
803 {
804         struct mlx5_aso_cq *cq = &sq->cq;
805         volatile struct mlx5_cqe *restrict cqe;
806         const unsigned int cq_size = 1 << cq->log_desc_n;
807         const unsigned int mask = cq_size - 1;
808         uint32_t idx;
809         uint32_t next_idx = cq->cq_ci & mask;
810         uint16_t max;
811         uint16_t n = 0;
812         int ret;
813
814         rte_spinlock_lock(&sq->sqsl);
815         max = (uint16_t)(sq->head - sq->tail);
816         if (unlikely(!max)) {
817                 rte_spinlock_unlock(&sq->sqsl);
818                 return;
819         }
820         do {
821                 idx = next_idx;
822                 next_idx = (cq->cq_ci + 1) & mask;
823                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
824                 cqe = &cq->cq_obj.cqes[idx];
825                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
826                 /*
827                  * Be sure owner read is done before any other cookie field or
828                  * opaque field.
829                  */
830                 rte_io_rmb();
831                 if (ret != MLX5_CQE_STATUS_SW_OWN) {
832                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
833                                 break;
834                         mlx5_aso_cqe_err_handle(sq);
835                 } else {
836                         n++;
837                 }
838                 cq->cq_ci++;
839         } while (1);
840         if (likely(n)) {
841                 mlx5_aso_mtrs_status_update(sq, n);
842                 sq->tail += n;
843                 rte_io_wmb();
844                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
845         }
846         rte_spinlock_unlock(&sq->sqsl);
847 }
848
849 /**
850  * Update meter parameter by send WQE.
851  *
852  * @param[in] dev
853  *   Pointer to Ethernet device.
854  * @param[in] priv
855  *   Pointer to mlx5 private data structure.
856  * @param[in] fm
857  *   Pointer to flow meter to be modified.
858  *
859  * @return
860  *   0 on success, a negative errno value otherwise and rte_errno is set.
861  */
862 int
863 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
864                         struct mlx5_aso_mtr *mtr)
865 {
866         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
867         uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
868
869         do {
870                 mlx5_aso_mtr_completion_handle(sq);
871                 if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr))
872                         return 0;
873                 /* Waiting for wqe resource. */
874                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
875         } while (--poll_wqe_times);
876         DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
877                         mtr->offset);
878         return -1;
879 }
880
881 /**
882  * Wait for meter to be ready.
883  *
884  * @param[in] dev
885  *   Pointer to Ethernet device.
886  * @param[in] priv
887  *   Pointer to mlx5 private data structure.
888  * @param[in] fm
889  *   Pointer to flow meter to be modified.
890  *
891  * @return
892  *   0 on success, a negative errno value otherwise and rte_errno is set.
893  */
894 int
895 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
896                         struct mlx5_aso_mtr *mtr)
897 {
898         struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
899         uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
900
901         if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
902                                             ASO_METER_READY)
903                 return 0;
904         do {
905                 mlx5_aso_mtr_completion_handle(sq);
906                 if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
907                                             ASO_METER_READY)
908                         return 0;
909                 /* Waiting for CQE ready. */
910                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
911         } while (--poll_cqe_times);
912         DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
913                         mtr->offset);
914         return -1;
915 }
916
917 /*
918  * Post a WQE to the ASO CT SQ to modify the context.
919  *
920  * @param[in] sh
921  *   Pointer to shared device context.
922  * @param[in] ct
923  *   Pointer to the generic CT structure related to the context.
924  * @param[in] profile
925  *   Pointer to configuration profile.
926  *
927  * @return
928  *   1 on success (WQE number), 0 on failure.
929  */
930 static uint16_t
931 mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
932                               struct mlx5_aso_ct_action *ct,
933                               const struct rte_flow_action_conntrack *profile)
934 {
935         volatile struct mlx5_aso_wqe *wqe = NULL;
936         struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
937         uint16_t size = 1 << sq->log_desc_n;
938         uint16_t mask = size - 1;
939         uint16_t res;
940         struct mlx5_aso_ct_pool *pool;
941         void *desg;
942         void *orig_dir;
943         void *reply_dir;
944
945         rte_spinlock_lock(&sq->sqsl);
946         /* Prevent other threads to update the index. */
947         res = size - (uint16_t)(sq->head - sq->tail);
948         if (unlikely(!res)) {
949                 rte_spinlock_unlock(&sq->sqsl);
950                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
951                 return 0;
952         }
953         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
954         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
955         /* Fill next WQE. */
956         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
957         sq->elts[sq->head & mask].ct = ct;
958         sq->elts[sq->head & mask].query_data = NULL;
959         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
960         /* Each WQE will have a single CT object. */
961         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
962                                                   ct->offset);
963         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
964                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
965                          WQE_CSEG_OPC_MOD_OFFSET) |
966                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
967         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
968                         (0u |
969                          (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
970                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
971                          (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
972                          (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
973         wqe->aso_cseg.data_mask = UINT64_MAX;
974         /* To make compiler happy. */
975         desg = (void *)(uintptr_t)wqe->aso_dseg.data;
976         MLX5_SET(conn_track_aso, desg, valid, 1);
977         MLX5_SET(conn_track_aso, desg, state, profile->state);
978         MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
979         MLX5_SET(conn_track_aso, desg, connection_assured,
980                  profile->live_connection);
981         MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
982         MLX5_SET(conn_track_aso, desg, challenged_acked,
983                  profile->challenge_ack_passed);
984         /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
985         MLX5_SET(conn_track_aso, desg, heartbeat, 0);
986         MLX5_SET(conn_track_aso, desg, max_ack_window,
987                  profile->max_ack_window);
988         MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
989         MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
990         MLX5_SET(conn_track_aso, desg, retranmission_limit,
991                  profile->retransmission_limit);
992         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
993                  profile->reply_dir.scale);
994         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
995                  profile->reply_dir.close_initiated);
996         /* Both directions will use the same liberal mode. */
997         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
998                  profile->liberal_mode);
999         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
1000                  profile->reply_dir.data_unacked);
1001         MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1002                  profile->reply_dir.last_ack_seen);
1003         MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1004                  profile->original_dir.scale);
1005         MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1006                  profile->original_dir.close_initiated);
1007         MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1008                  profile->liberal_mode);
1009         MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1010                  profile->original_dir.data_unacked);
1011         MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1012                  profile->original_dir.last_ack_seen);
1013         MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1014         MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1015         MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1016         MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1017         MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1018         MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1019         orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1020         MLX5_SET(tcp_window_params, orig_dir, sent_end,
1021                  profile->original_dir.sent_end);
1022         MLX5_SET(tcp_window_params, orig_dir, reply_end,
1023                  profile->original_dir.reply_end);
1024         MLX5_SET(tcp_window_params, orig_dir, max_win,
1025                  profile->original_dir.max_win);
1026         MLX5_SET(tcp_window_params, orig_dir, max_ack,
1027                  profile->original_dir.max_ack);
1028         reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1029         MLX5_SET(tcp_window_params, reply_dir, sent_end,
1030                  profile->reply_dir.sent_end);
1031         MLX5_SET(tcp_window_params, reply_dir, reply_end,
1032                  profile->reply_dir.reply_end);
1033         MLX5_SET(tcp_window_params, reply_dir, max_win,
1034                  profile->reply_dir.max_win);
1035         MLX5_SET(tcp_window_params, reply_dir, max_ack,
1036                  profile->reply_dir.max_ack);
1037         sq->head++;
1038         sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1039         mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1040                            sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1041                            !sh->tx_uar.dbnc);
1042         rte_spinlock_unlock(&sq->sqsl);
1043         return 1;
1044 }
1045
1046 /*
1047  * Update the status field of CTs to indicate ready to be used by flows.
1048  * A continuous number of CTs since last update.
1049  *
1050  * @param[in] sq
1051  *   Pointer to ASO CT SQ.
1052  * @param[in] num
1053  *   Number of CT structures to be updated.
1054  *
1055  * @return
1056  *   0 on success, a negative value.
1057  */
1058 static void
1059 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1060 {
1061         uint16_t size = 1 << sq->log_desc_n;
1062         uint16_t mask = size - 1;
1063         uint16_t i;
1064         struct mlx5_aso_ct_action *ct = NULL;
1065         uint16_t idx;
1066
1067         for (i = 0; i < num; i++) {
1068                 idx = (uint16_t)((sq->tail + i) & mask);
1069                 ct = sq->elts[idx].ct;
1070                 MLX5_ASSERT(ct);
1071                 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1072                 if (sq->elts[idx].query_data)
1073                         rte_memcpy(sq->elts[idx].query_data,
1074                                    (char *)((uintptr_t)sq->mr.addr + idx * 64),
1075                                    64);
1076         }
1077 }
1078
1079 /*
1080  * Post a WQE to the ASO CT SQ to query the current context.
1081  *
1082  * @param[in] sh
1083  *   Pointer to shared device context.
1084  * @param[in] ct
1085  *   Pointer to the generic CT structure related to the context.
1086  * @param[in] data
1087  *   Pointer to data area to be filled.
1088  *
1089  * @return
1090  *   1 on success (WQE number), 0 on failure.
1091  */
1092 static int
1093 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
1094                             struct mlx5_aso_ct_action *ct, char *data)
1095 {
1096         volatile struct mlx5_aso_wqe *wqe = NULL;
1097         struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
1098         uint16_t size = 1 << sq->log_desc_n;
1099         uint16_t mask = size - 1;
1100         uint16_t res;
1101         uint16_t wqe_idx;
1102         struct mlx5_aso_ct_pool *pool;
1103         enum mlx5_aso_ct_state state =
1104                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1105
1106         if (state == ASO_CONNTRACK_FREE) {
1107                 DRV_LOG(ERR, "Fail: No context to query");
1108                 return -1;
1109         } else if (state == ASO_CONNTRACK_WAIT) {
1110                 return 0;
1111         }
1112         rte_spinlock_lock(&sq->sqsl);
1113         res = size - (uint16_t)(sq->head - sq->tail);
1114         if (unlikely(!res)) {
1115                 rte_spinlock_unlock(&sq->sqsl);
1116                 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1117                 return 0;
1118         }
1119         MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1120         wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1121         /* Confirm the location and address of the prefetch instruction. */
1122         rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1123         /* Fill next WQE. */
1124         wqe_idx = sq->head & mask;
1125         sq->elts[wqe_idx].ct = ct;
1126         sq->elts[wqe_idx].query_data = data;
1127         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1128         /* Each WQE will have a single CT object. */
1129         wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1130                                                   ct->offset);
1131         wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1132                         (ASO_OPC_MOD_CONNECTION_TRACKING <<
1133                          WQE_CSEG_OPC_MOD_OFFSET) |
1134                         sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1135         /*
1136          * There is no write request is required.
1137          * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1138          * "BYTEWISE_64BYTE" is needed for a whole context.
1139          * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1140          * "data_mask" is ignored.
1141          * Buffer address was already filled during initialization.
1142          */
1143         wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1144                                         ASO_CSEG_DATA_MASK_MODE_OFFSET);
1145         wqe->aso_cseg.data_mask = 0;
1146         sq->head++;
1147         /*
1148          * Each WQE contains 2 WQEBB's, even though
1149          * data segment is not used in this case.
1150          */
1151         sq->pi += 2;
1152         mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1153                            sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1154                            !sh->tx_uar.dbnc);
1155         rte_spinlock_unlock(&sq->sqsl);
1156         return 1;
1157 }
1158
1159 /*
1160  * Handle completions from WQEs sent to ASO CT.
1161  *
1162  * @param[in] mng
1163  *   Pointer to the CT pools management structure.
1164  */
1165 static void
1166 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1167 {
1168         struct mlx5_aso_sq *sq = &mng->aso_sq;
1169         struct mlx5_aso_cq *cq = &sq->cq;
1170         volatile struct mlx5_cqe *restrict cqe;
1171         const uint32_t cq_size = 1 << cq->log_desc_n;
1172         const uint32_t mask = cq_size - 1;
1173         uint32_t idx;
1174         uint32_t next_idx;
1175         uint16_t max;
1176         uint16_t n = 0;
1177         int ret;
1178
1179         rte_spinlock_lock(&sq->sqsl);
1180         max = (uint16_t)(sq->head - sq->tail);
1181         if (unlikely(!max)) {
1182                 rte_spinlock_unlock(&sq->sqsl);
1183                 return;
1184         }
1185         next_idx = cq->cq_ci & mask;
1186         do {
1187                 idx = next_idx;
1188                 next_idx = (cq->cq_ci + 1) & mask;
1189                 /* Need to confirm the position of the prefetch. */
1190                 rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1191                 cqe = &cq->cq_obj.cqes[idx];
1192                 ret = check_cqe(cqe, cq_size, cq->cq_ci);
1193                 /*
1194                  * Be sure owner read is done before any other cookie field or
1195                  * opaque field.
1196                  */
1197                 rte_io_rmb();
1198                 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1199                         if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1200                                 break;
1201                         mlx5_aso_cqe_err_handle(sq);
1202                 } else {
1203                         n++;
1204                 }
1205                 cq->cq_ci++;
1206         } while (1);
1207         if (likely(n)) {
1208                 mlx5_aso_ct_status_update(sq, n);
1209                 sq->tail += n;
1210                 rte_io_wmb();
1211                 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1212         }
1213         rte_spinlock_unlock(&sq->sqsl);
1214 }
1215
1216 /*
1217  * Update connection tracking ASO context by sending WQE.
1218  *
1219  * @param[in] sh
1220  *   Pointer to mlx5_dev_ctx_shared object.
1221  * @param[in] ct
1222  *   Pointer to connection tracking offload object.
1223  * @param[in] profile
1224  *   Pointer to connection tracking TCP parameter.
1225  *
1226  * @return
1227  *   0 on success, -1 on failure.
1228  */
1229 int
1230 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1231                           struct mlx5_aso_ct_action *ct,
1232                           const struct rte_flow_action_conntrack *profile)
1233 {
1234         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1235         struct mlx5_aso_ct_pool *pool;
1236
1237         MLX5_ASSERT(ct);
1238         do {
1239                 mlx5_aso_ct_completion_handle(sh->ct_mng);
1240                 if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
1241                         return 0;
1242                 /* Waiting for wqe resource. */
1243                 rte_delay_us_sleep(10u);
1244         } while (--poll_wqe_times);
1245         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1246         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1247                 ct->offset, pool->index);
1248         return -1;
1249 }
1250
1251 /*
1252  * The routine is used to wait for WQE completion to continue with queried data.
1253  *
1254  * @param[in] sh
1255  *   Pointer to mlx5_dev_ctx_shared object.
1256  * @param[in] ct
1257  *   Pointer to connection tracking offload object.
1258  *
1259  * @return
1260  *   0 on success, -1 on failure.
1261  */
1262 int
1263 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1264                        struct mlx5_aso_ct_action *ct)
1265 {
1266         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1267         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1268         struct mlx5_aso_ct_pool *pool;
1269
1270         if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1271             ASO_CONNTRACK_READY)
1272                 return 0;
1273         do {
1274                 mlx5_aso_ct_completion_handle(mng);
1275                 if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1276                     ASO_CONNTRACK_READY)
1277                         return 0;
1278                 /* Waiting for CQE ready, consider should block or sleep. */
1279                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1280         } while (--poll_cqe_times);
1281         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1282         DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1283                 ct->offset, pool->index);
1284         return -1;
1285 }
1286
1287 /*
1288  * Convert the hardware conntrack data format into the profile.
1289  *
1290  * @param[in] profile
1291  *   Pointer to conntrack profile to be filled after query.
1292  * @param[in] wdata
1293  *   Pointer to data fetched from hardware.
1294  */
1295 static inline void
1296 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1297                         char *wdata)
1298 {
1299         void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1300         void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1301
1302         /* MLX5_GET16 should be taken into consideration. */
1303         profile->state = (enum rte_flow_conntrack_state)
1304                          MLX5_GET(conn_track_aso, wdata, state);
1305         profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1306         profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1307                                           sack_permitted);
1308         profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1309                                             connection_assured);
1310         profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1311                                                  challenged_acked);
1312         profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1313                                            max_ack_window);
1314         profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1315                                                  retranmission_limit);
1316         profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1317         profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1318         profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1319                               MLX5_GET(conn_track_aso, wdata, last_index);
1320         profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1321         profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1322         profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1323         profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1324                                 reply_direction_tcp_liberal_enabled) |
1325                                 MLX5_GET(conn_track_aso, wdata,
1326                                 original_direction_tcp_liberal_enabled);
1327         /* No liberal in the RTE structure profile. */
1328         profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1329                                             reply_direction_tcp_scale);
1330         profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1331                                         reply_direction_tcp_close_initiated);
1332         profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1333                                         reply_direction_tcp_data_unacked);
1334         profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1335                                         reply_direction_tcp_max_ack);
1336         profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1337                                                r_dir, sent_end);
1338         profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1339                                                 r_dir, reply_end);
1340         profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1341                                               r_dir, max_win);
1342         profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1343                                               r_dir, max_ack);
1344         profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1345                                                original_direction_tcp_scale);
1346         profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1347                                         original_direction_tcp_close_initiated);
1348         profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1349                                         original_direction_tcp_data_unacked);
1350         profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1351                                         original_direction_tcp_max_ack);
1352         profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1353                                                   o_dir, sent_end);
1354         profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1355                                                    o_dir, reply_end);
1356         profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1357                                                  o_dir, max_win);
1358         profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1359                                                  o_dir, max_ack);
1360 }
1361
1362 /*
1363  * Query connection tracking information parameter by send WQE.
1364  *
1365  * @param[in] dev
1366  *   Pointer to Ethernet device.
1367  * @param[in] ct
1368  *   Pointer to connection tracking offload object.
1369  * @param[out] profile
1370  *   Pointer to connection tracking TCP information.
1371  *
1372  * @return
1373  *   0 on success, -1 on failure.
1374  */
1375 int
1376 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1377                          struct mlx5_aso_ct_action *ct,
1378                          struct rte_flow_action_conntrack *profile)
1379 {
1380         uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1381         struct mlx5_aso_ct_pool *pool;
1382         char out_data[64 * 2];
1383         int ret;
1384
1385         MLX5_ASSERT(ct);
1386         do {
1387                 mlx5_aso_ct_completion_handle(sh->ct_mng);
1388                 ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
1389                 if (ret < 0)
1390                         return ret;
1391                 else if (ret > 0)
1392                         goto data_handle;
1393                 /* Waiting for wqe resource or state. */
1394                 else
1395                         rte_delay_us_sleep(10u);
1396         } while (--poll_wqe_times);
1397         pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1398         DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1399                 ct->offset, pool->index);
1400         return -1;
1401 data_handle:
1402         ret = mlx5_aso_ct_wait_ready(sh, ct);
1403         if (!ret)
1404                 mlx5_aso_ct_obj_analyze(profile, out_data);
1405         return ret;
1406 }
1407
1408 /*
1409  * Make sure the conntrack context is synchronized with hardware before
1410  * creating a flow rule that uses it.
1411  *
1412  * @param[in] sh
1413  *   Pointer to shared device context.
1414  * @param[in] ct
1415  *   Pointer to connection tracking offload object.
1416  *
1417  * @return
1418  *   0 on success, a negative errno value otherwise and rte_errno is set.
1419  */
1420 int
1421 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1422                       struct mlx5_aso_ct_action *ct)
1423 {
1424         struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1425         uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1426         enum mlx5_aso_ct_state state =
1427                                 __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1428
1429         if (state == ASO_CONNTRACK_FREE) {
1430                 rte_errno = ENXIO;
1431                 return -rte_errno;
1432         } else if (state == ASO_CONNTRACK_READY ||
1433                    state == ASO_CONNTRACK_QUERY) {
1434                 return 0;
1435         }
1436         do {
1437                 mlx5_aso_ct_completion_handle(mng);
1438                 state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1439                 if (state == ASO_CONNTRACK_READY ||
1440                     state == ASO_CONNTRACK_QUERY)
1441                         return 0;
1442                 /* Waiting for CQE ready, consider should block or sleep. */
1443                 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1444         } while (--poll_cqe_times);
1445         rte_errno = EBUSY;
1446         return -rte_errno;
1447 }