b50cd8e5ce92b9e92649afa560bae16e18e01168
[dpdk.git] / drivers / event / dlb2 / dlb2.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2022 Intel Corporation
3  */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46         .driver_name = "", /* probe will set */
47         .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48         .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50         .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52         .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54         .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55         .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56         .max_event_priority_levels = DLB2_QID_PRIORITIES,
57         .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58         .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
59         .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60         .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61         .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62         .max_single_link_event_port_queue_pairs =
63                 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64         .event_dev_cap = (RTE_EVENT_DEV_CAP_EVENT_QOS |
65                           RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
66                           RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
67                           RTE_EVENT_DEV_CAP_BURST_MODE |
68                           RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69                           RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK |
70                           RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT |
71                           RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
72 };
73
74 struct process_local_port_data
75 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
76
77 static void
78 dlb2_free_qe_mem(struct dlb2_port *qm_port)
79 {
80         if (qm_port == NULL)
81                 return;
82
83         rte_free(qm_port->qe4);
84         qm_port->qe4 = NULL;
85
86         rte_free(qm_port->int_arm_qe);
87         qm_port->int_arm_qe = NULL;
88
89         rte_free(qm_port->consume_qe);
90         qm_port->consume_qe = NULL;
91
92         rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
93         dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
94 }
95
96 /* override defaults with value(s) provided on command line */
97 static void
98 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
99                                  int *qid_depth_thresholds)
100 {
101         int q;
102
103         for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
104                 if (qid_depth_thresholds[q] != 0)
105                         dlb2->ev_queues[q].depth_threshold =
106                                 qid_depth_thresholds[q];
107         }
108 }
109
110 /* override defaults with value(s) provided on command line */
111 static void
112 dlb2_init_cq_weight(struct dlb2_eventdev *dlb2, int *cq_weight)
113 {
114         int q;
115
116         for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++)
117                 dlb2->ev_ports[q].cq_weight = cq_weight[q];
118 }
119
120 static int
121 set_cq_weight(const char *key __rte_unused,
122               const char *value,
123               void *opaque)
124 {
125         struct dlb2_cq_weight *cq_weight = opaque;
126         int first, last, weight, i;
127
128         if (value == NULL || opaque == NULL) {
129                 DLB2_LOG_ERR("NULL pointer\n");
130                 return -EINVAL;
131         }
132
133         /* command line override may take one of the following 3 forms:
134          * qid_depth_thresh=all:<threshold_value> ... all queues
135          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
136          * qid_depth_thresh=qid:<threshold_value> ... just one queue
137          */
138         if (sscanf(value, "all:%d", &weight) == 1) {
139                 first = 0;
140                 last = DLB2_MAX_NUM_LDB_PORTS - 1;
141         } else if (sscanf(value, "%d-%d:%d", &first, &last, &weight) == 3) {
142                 /* we have everything we need */
143         } else if (sscanf(value, "%d:%d", &first, &weight) == 2) {
144                 last = first;
145         } else {
146                 DLB2_LOG_ERR("Error parsing ldb port qe weight devarg. Should be all:val, qid-qid:val, or qid:val\n");
147                 return -EINVAL;
148         }
149
150         if (first > last || first < 0 ||
151                 last >= DLB2_MAX_NUM_LDB_PORTS) {
152                 DLB2_LOG_ERR("Error parsing ldb port qe weight arg, invalid port value\n");
153                 return -EINVAL;
154         }
155
156         if (weight < 0 || weight > DLB2_MAX_CQ_DEPTH_OVERRIDE) {
157                 DLB2_LOG_ERR("Error parsing ldb port qe weight devarg, must be < cq depth\n");
158                 return -EINVAL;
159         }
160
161         for (i = first; i <= last; i++)
162                 cq_weight->limit[i] = weight; /* indexed by qid */
163
164         return 0;
165 }
166
167 /* override defaults with value(s) provided on command line */
168 static void
169 dlb2_init_port_cos(struct dlb2_eventdev *dlb2, int *port_cos)
170 {
171         int q;
172
173         for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++) {
174                 dlb2->ev_ports[q].cos_id = port_cos[q];
175                 dlb2->cos_ports[port_cos[q]]++;
176         }
177 }
178
179 static void
180 dlb2_init_cos_bw(struct dlb2_eventdev *dlb2,
181                  struct dlb2_cos_bw *cos_bw)
182 {
183         int q;
184         for (q = 0; q < DLB2_COS_NUM_VALS; q++)
185                 dlb2->cos_bw[q] = cos_bw->val[q];
186
187 }
188
189 static int
190 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
191 {
192         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
193         struct dlb2_hw_resource_info *dlb2_info = &handle->info;
194         int num_ldb_ports;
195         int ret;
196
197         /* Query driver resources provisioned for this device */
198
199         ret = dlb2_iface_get_num_resources(handle,
200                                            &dlb2->hw_rsrc_query_results);
201         if (ret) {
202                 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
203                 return ret;
204         }
205
206         /* Complete filling in device resource info returned to evdev app,
207          * overriding any default values.
208          * The capabilities (CAPs) were set at compile time.
209          */
210
211         if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
212                 num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
213         else
214                 num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
215
216         evdev_dlb2_default_info.max_event_queues =
217                 dlb2->hw_rsrc_query_results.num_ldb_queues;
218
219         evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
220
221         if (dlb2->version == DLB2_HW_V2_5) {
222                 evdev_dlb2_default_info.max_num_events =
223                         dlb2->hw_rsrc_query_results.num_credits;
224         } else {
225                 evdev_dlb2_default_info.max_num_events =
226                         dlb2->hw_rsrc_query_results.num_ldb_credits;
227         }
228         /* Save off values used when creating the scheduling domain. */
229
230         handle->info.num_sched_domains =
231                 dlb2->hw_rsrc_query_results.num_sched_domains;
232
233         if (dlb2->version == DLB2_HW_V2_5) {
234                 handle->info.hw_rsrc_max.nb_events_limit =
235                         dlb2->hw_rsrc_query_results.num_credits;
236         } else {
237                 handle->info.hw_rsrc_max.nb_events_limit =
238                         dlb2->hw_rsrc_query_results.num_ldb_credits;
239         }
240         handle->info.hw_rsrc_max.num_queues =
241                 dlb2->hw_rsrc_query_results.num_ldb_queues +
242                 dlb2->hw_rsrc_query_results.num_dir_ports;
243
244         handle->info.hw_rsrc_max.num_ldb_queues =
245                 dlb2->hw_rsrc_query_results.num_ldb_queues;
246
247         handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
248
249         handle->info.hw_rsrc_max.num_dir_ports =
250                 dlb2->hw_rsrc_query_results.num_dir_ports;
251
252         handle->info.hw_rsrc_max.reorder_window_size =
253                 dlb2->hw_rsrc_query_results.num_hist_list_entries;
254
255         rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
256
257         return 0;
258 }
259
260 #define DLB2_BASE_10 10
261
262 static int
263 dlb2_string_to_int(int *result, const char *str)
264 {
265         long ret;
266         char *endptr;
267
268         if (str == NULL || result == NULL)
269                 return -EINVAL;
270
271         errno = 0;
272         ret = strtol(str, &endptr, DLB2_BASE_10);
273         if (errno)
274                 return -errno;
275
276         /* long int and int may be different width for some architectures */
277         if (ret < INT_MIN || ret > INT_MAX || endptr == str)
278                 return -EINVAL;
279
280         *result = ret;
281         return 0;
282 }
283
284 static int
285 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
286 {
287         int *socket_id = opaque;
288         int ret;
289
290         ret = dlb2_string_to_int(socket_id, value);
291         if (ret < 0)
292                 return ret;
293
294         if (*socket_id > RTE_MAX_NUMA_NODES)
295                 return -EINVAL;
296         return 0;
297 }
298
299
300 static int
301 set_max_cq_depth(const char *key __rte_unused,
302                  const char *value,
303                  void *opaque)
304 {
305         int *max_cq_depth = opaque;
306         int ret;
307
308         if (value == NULL || opaque == NULL) {
309                 DLB2_LOG_ERR("NULL pointer\n");
310                 return -EINVAL;
311         }
312
313         ret = dlb2_string_to_int(max_cq_depth, value);
314         if (ret < 0)
315                 return ret;
316
317         if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
318             *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
319             !rte_is_power_of_2(*max_cq_depth)) {
320                 DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 2\n",
321                              DLB2_MIN_CQ_DEPTH_OVERRIDE,
322                              DLB2_MAX_CQ_DEPTH_OVERRIDE);
323                 return -EINVAL;
324         }
325
326         return 0;
327 }
328
329 static int
330 set_max_num_events(const char *key __rte_unused,
331                    const char *value,
332                    void *opaque)
333 {
334         int *max_num_events = opaque;
335         int ret;
336
337         if (value == NULL || opaque == NULL) {
338                 DLB2_LOG_ERR("NULL pointer\n");
339                 return -EINVAL;
340         }
341
342         ret = dlb2_string_to_int(max_num_events, value);
343         if (ret < 0)
344                 return ret;
345
346         if (*max_num_events < 0 || *max_num_events >
347                         DLB2_MAX_NUM_LDB_CREDITS) {
348                 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
349                              DLB2_MAX_NUM_LDB_CREDITS);
350                 return -EINVAL;
351         }
352
353         return 0;
354 }
355
356 static int
357 set_num_dir_credits(const char *key __rte_unused,
358                     const char *value,
359                     void *opaque)
360 {
361         int *num_dir_credits = opaque;
362         int ret;
363
364         if (value == NULL || opaque == NULL) {
365                 DLB2_LOG_ERR("NULL pointer\n");
366                 return -EINVAL;
367         }
368
369         ret = dlb2_string_to_int(num_dir_credits, value);
370         if (ret < 0)
371                 return ret;
372
373         if (*num_dir_credits < 0 ||
374             *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
375                 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
376                              DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
377                 return -EINVAL;
378         }
379
380         return 0;
381 }
382
383 static int
384 set_dev_id(const char *key __rte_unused,
385            const char *value,
386            void *opaque)
387 {
388         int *dev_id = opaque;
389         int ret;
390
391         if (value == NULL || opaque == NULL) {
392                 DLB2_LOG_ERR("NULL pointer\n");
393                 return -EINVAL;
394         }
395
396         ret = dlb2_string_to_int(dev_id, value);
397         if (ret < 0)
398                 return ret;
399
400         return 0;
401 }
402
403 static int
404 set_poll_interval(const char *key __rte_unused,
405         const char *value,
406         void *opaque)
407 {
408         int *poll_interval = opaque;
409         int ret;
410
411         if (value == NULL || opaque == NULL) {
412                 DLB2_LOG_ERR("NULL pointer\n");
413                 return -EINVAL;
414         }
415
416         ret = dlb2_string_to_int(poll_interval, value);
417         if (ret < 0)
418                 return ret;
419
420         return 0;
421 }
422
423 static int
424 set_port_cos(const char *key __rte_unused,
425              const char *value,
426              void *opaque)
427 {
428         struct dlb2_port_cos *port_cos = opaque;
429         int first, last, cos_id, i;
430
431         if (value == NULL || opaque == NULL) {
432                 DLB2_LOG_ERR("NULL pointer\n");
433                 return -EINVAL;
434         }
435
436         /* command line override may take one of the following 3 forms:
437          * port_cos=all:<cos_id> ... all ports
438          * port_cos=port-port:<cos_id> ... a range of ports
439          * port_cos=port:<cos_id> ... just one port
440          */
441         if (sscanf(value, "all:%d", &cos_id) == 1) {
442                 first = 0;
443                 last = DLB2_MAX_NUM_LDB_PORTS - 1;
444         } else if (sscanf(value, "%d-%d:%d", &first, &last, &cos_id) == 3) {
445                 /* we have everything we need */
446         } else if (sscanf(value, "%d:%d", &first, &cos_id) == 2) {
447                 last = first;
448         } else {
449                 DLB2_LOG_ERR("Error parsing ldb port port_cos devarg. Should be all:val, port-port:val, or port:val\n");
450                 return -EINVAL;
451         }
452
453         if (first > last || first < 0 ||
454                 last >= DLB2_MAX_NUM_LDB_PORTS) {
455                 DLB2_LOG_ERR("Error parsing ldb port cos_id arg, invalid port value\n");
456                 return -EINVAL;
457         }
458
459         if (cos_id < DLB2_COS_0 || cos_id > DLB2_COS_3) {
460                 DLB2_LOG_ERR("Error parsing ldb port cos_id devarg, must be between 0 and 4\n");
461                 return -EINVAL;
462         }
463
464         for (i = first; i <= last; i++)
465                 port_cos->cos_id[i] = cos_id; /* indexed by port */
466
467         return 0;
468 }
469
470 static int
471 set_cos_bw(const char *key __rte_unused,
472              const char *value,
473              void *opaque)
474 {
475         struct dlb2_cos_bw *cos_bw = opaque;
476
477         if (opaque == NULL) {
478                 DLB2_LOG_ERR("NULL pointer\n");
479                 return -EINVAL;
480         }
481
482         /* format must be %d,%d,%d,%d */
483
484         if (sscanf(value, "%d,%d,%d,%d", &cos_bw->val[0], &cos_bw->val[1],
485                    &cos_bw->val[2], &cos_bw->val[3]) != 4) {
486                 DLB2_LOG_ERR("Error parsing cos bandwidth devarg. Should be bw0,bw1,bw2,bw3 where all values combined are <= 100\n");
487                 return -EINVAL;
488         }
489         if (cos_bw->val[0] + cos_bw->val[1] + cos_bw->val[2] + cos_bw->val[3] > 100) {
490                 DLB2_LOG_ERR("Error parsing cos bandwidth devarg. Should be bw0,bw1,bw2,bw3  where all values combined are <= 100\n");
491                 return -EINVAL;
492         }
493
494         return 0;
495 }
496
497 static int
498 set_sw_credit_quanta(const char *key __rte_unused,
499         const char *value,
500         void *opaque)
501 {
502         int *sw_credit_quanta = opaque;
503         int ret;
504
505         if (value == NULL || opaque == NULL) {
506                 DLB2_LOG_ERR("NULL pointer\n");
507                 return -EINVAL;
508         }
509
510         ret = dlb2_string_to_int(sw_credit_quanta, value);
511         if (ret < 0)
512                 return ret;
513
514         if (*sw_credit_quanta <= 0) {
515                 DLB2_LOG_ERR("sw_credit_quanta must be > 0\n");
516                 return -EINVAL;
517         }
518
519         return 0;
520 }
521
522 static int
523 set_hw_credit_quanta(const char *key __rte_unused,
524         const char *value,
525         void *opaque)
526 {
527         int *hw_credit_quanta = opaque;
528         int ret;
529
530         if (value == NULL || opaque == NULL) {
531                 DLB2_LOG_ERR("NULL pointer\n");
532                 return -EINVAL;
533         }
534
535         ret = dlb2_string_to_int(hw_credit_quanta, value);
536         if (ret < 0)
537                 return ret;
538
539         return 0;
540 }
541
542 static int
543 set_default_depth_thresh(const char *key __rte_unused,
544         const char *value,
545         void *opaque)
546 {
547         int *default_depth_thresh = opaque;
548         int ret;
549
550         if (value == NULL || opaque == NULL) {
551                 DLB2_LOG_ERR("NULL pointer\n");
552                 return -EINVAL;
553         }
554
555         ret = dlb2_string_to_int(default_depth_thresh, value);
556         if (ret < 0)
557                 return ret;
558
559         return 0;
560 }
561
562 static int
563 set_vector_opts_enab(const char *key __rte_unused,
564         const char *value,
565         void *opaque)
566 {
567         bool *dlb2_vector_opts_enabled = opaque;
568
569         if (value == NULL || opaque == NULL) {
570                 DLB2_LOG_ERR("NULL pointer\n");
571                 return -EINVAL;
572         }
573
574         if ((*value == 'y') || (*value == 'Y'))
575                 *dlb2_vector_opts_enabled = true;
576         else
577                 *dlb2_vector_opts_enabled = false;
578
579         return 0;
580 }
581
582 static int
583 set_qid_depth_thresh(const char *key __rte_unused,
584                      const char *value,
585                      void *opaque)
586 {
587         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
588         int first, last, thresh, i;
589
590         if (value == NULL || opaque == NULL) {
591                 DLB2_LOG_ERR("NULL pointer\n");
592                 return -EINVAL;
593         }
594
595         /* command line override may take one of the following 3 forms:
596          * qid_depth_thresh=all:<threshold_value> ... all queues
597          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
598          * qid_depth_thresh=qid:<threshold_value> ... just one queue
599          */
600         if (sscanf(value, "all:%d", &thresh) == 1) {
601                 first = 0;
602                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
603         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
604                 /* we have everything we need */
605         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
606                 last = first;
607         } else {
608                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
609                 return -EINVAL;
610         }
611
612         if (first > last || first < 0 ||
613                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
614                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
615                 return -EINVAL;
616         }
617
618         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
619                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
620                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
621                 return -EINVAL;
622         }
623
624         for (i = first; i <= last; i++)
625                 qid_thresh->val[i] = thresh; /* indexed by qid */
626
627         return 0;
628 }
629
630 static int
631 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
632                           const char *value,
633                           void *opaque)
634 {
635         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
636         int first, last, thresh, i;
637
638         if (value == NULL || opaque == NULL) {
639                 DLB2_LOG_ERR("NULL pointer\n");
640                 return -EINVAL;
641         }
642
643         /* command line override may take one of the following 3 forms:
644          * qid_depth_thresh=all:<threshold_value> ... all queues
645          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
646          * qid_depth_thresh=qid:<threshold_value> ... just one queue
647          */
648         if (sscanf(value, "all:%d", &thresh) == 1) {
649                 first = 0;
650                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
651         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
652                 /* we have everything we need */
653         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
654                 last = first;
655         } else {
656                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
657                 return -EINVAL;
658         }
659
660         if (first > last || first < 0 ||
661                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
662                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
663                 return -EINVAL;
664         }
665
666         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
667                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
668                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
669                 return -EINVAL;
670         }
671
672         for (i = first; i <= last; i++)
673                 qid_thresh->val[i] = thresh; /* indexed by qid */
674
675         return 0;
676 }
677
678 static void
679 dlb2_eventdev_info_get(struct rte_eventdev *dev,
680                        struct rte_event_dev_info *dev_info)
681 {
682         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
683         int ret;
684
685         ret = dlb2_hw_query_resources(dlb2);
686         if (ret) {
687                 const struct rte_eventdev_data *data = dev->data;
688
689                 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
690                              ret, data->dev_id);
691                 /* fn is void, so fall through and return values set up in
692                  * probe
693                  */
694         }
695
696         /* Add num resources currently owned by this domain.
697          * These would become available if the scheduling domain were reset due
698          * to the application recalling eventdev_configure to *reconfigure* the
699          * domain.
700          */
701         evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
702         evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
703         if (dlb2->version == DLB2_HW_V2_5) {
704                 evdev_dlb2_default_info.max_num_events +=
705                         dlb2->max_credits;
706         } else {
707                 evdev_dlb2_default_info.max_num_events +=
708                         dlb2->max_ldb_credits;
709         }
710         evdev_dlb2_default_info.max_event_queues =
711                 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
712                         RTE_EVENT_MAX_QUEUES_PER_DEV);
713
714         evdev_dlb2_default_info.max_num_events =
715                 RTE_MIN(evdev_dlb2_default_info.max_num_events,
716                         dlb2->max_num_events_override);
717
718         *dev_info = evdev_dlb2_default_info;
719 }
720
721 static int
722 dlb2_hw_create_sched_domain(struct dlb2_eventdev *dlb2,
723                             struct dlb2_hw_dev *handle,
724                             const struct dlb2_hw_rsrcs *resources_asked,
725                             uint8_t device_version)
726 {
727         int ret = 0;
728         uint32_t cos_ports = 0;
729         struct dlb2_create_sched_domain_args *cfg;
730
731         if (resources_asked == NULL) {
732                 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
733                 ret = EINVAL;
734                 goto error_exit;
735         }
736
737         /* Map generic qm resources to dlb2 resources */
738         cfg = &handle->cfg.resources;
739
740         /* DIR ports and queues */
741
742         cfg->num_dir_ports = resources_asked->num_dir_ports;
743         if (device_version == DLB2_HW_V2_5)
744                 cfg->num_credits = resources_asked->num_credits;
745         else
746                 cfg->num_dir_credits = resources_asked->num_dir_credits;
747
748         /* LDB queues */
749
750         cfg->num_ldb_queues = resources_asked->num_ldb_queues;
751
752         /* LDB ports */
753
754         /* tally of ports with non default COS */
755         cos_ports = dlb2->cos_ports[1] + dlb2->cos_ports[2] +
756                     dlb2->cos_ports[3];
757
758         if (cos_ports > resources_asked->num_ldb_ports) {
759                 DLB2_LOG_ERR("dlb2: num_ldb_ports < nonzero cos_ports\n");
760                 ret = EINVAL;
761                 goto error_exit;
762         }
763
764         cfg->cos_strict = 0; /* Best effort */
765         cfg->num_cos_ldb_ports[0] = resources_asked->num_ldb_ports - cos_ports;
766         cfg->num_cos_ldb_ports[1] = dlb2->cos_ports[1];
767         cfg->num_cos_ldb_ports[2] = dlb2->cos_ports[2];
768         cfg->num_cos_ldb_ports[3] = dlb2->cos_ports[3];
769
770         if (device_version == DLB2_HW_V2)
771                 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
772
773         cfg->num_atomic_inflights =
774                 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
775                 cfg->num_ldb_queues;
776
777         cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
778                 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
779
780         if (device_version == DLB2_HW_V2_5) {
781                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
782                              cfg->num_ldb_queues,
783                              resources_asked->num_ldb_ports,
784                              cfg->num_dir_ports,
785                              cfg->num_atomic_inflights,
786                              cfg->num_hist_list_entries,
787                              cfg->num_credits);
788         } else {
789                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
790                              cfg->num_ldb_queues,
791                              resources_asked->num_ldb_ports,
792                              cfg->num_dir_ports,
793                              cfg->num_atomic_inflights,
794                              cfg->num_hist_list_entries,
795                              cfg->num_ldb_credits,
796                              cfg->num_dir_credits);
797         }
798
799         /* Configure the QM */
800
801         ret = dlb2_iface_sched_domain_create(handle, cfg);
802         if (ret < 0) {
803                 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
804                              ret,
805                              dlb2_error_strings[cfg->response.status]);
806
807                 goto error_exit;
808         }
809
810         handle->domain_id = cfg->response.id;
811         handle->cfg.configured = true;
812
813 error_exit:
814
815         return ret;
816 }
817
818 static void
819 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
820 {
821         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
822         enum dlb2_configuration_state config_state;
823         int i, j;
824
825         dlb2_iface_domain_reset(dlb2);
826
827         /* Free all dynamically allocated port memory */
828         for (i = 0; i < dlb2->num_ports; i++)
829                 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
830
831         /* If reconfiguring, mark the device's queues and ports as "previously
832          * configured." If the user doesn't reconfigure them, the PMD will
833          * reapply their previous configuration when the device is started.
834          */
835         config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
836                 DLB2_NOT_CONFIGURED;
837
838         for (i = 0; i < dlb2->num_ports; i++) {
839                 dlb2->ev_ports[i].qm_port.config_state = config_state;
840                 /* Reset setup_done so ports can be reconfigured */
841                 dlb2->ev_ports[i].setup_done = false;
842                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
843                         dlb2->ev_ports[i].link[j].mapped = false;
844         }
845
846         for (i = 0; i < dlb2->num_queues; i++)
847                 dlb2->ev_queues[i].qm_queue.config_state = config_state;
848
849         for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
850                 dlb2->ev_queues[i].setup_done = false;
851
852         dlb2->num_ports = 0;
853         dlb2->num_ldb_ports = 0;
854         dlb2->num_dir_ports = 0;
855         dlb2->num_queues = 0;
856         dlb2->num_ldb_queues = 0;
857         dlb2->num_dir_queues = 0;
858         dlb2->configured = false;
859 }
860
861 /* Note: 1 QM instance per QM device, QM instance/device == event device */
862 static int
863 dlb2_eventdev_configure(const struct rte_eventdev *dev)
864 {
865         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
866         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
867         struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
868         const struct rte_eventdev_data *data = dev->data;
869         const struct rte_event_dev_config *config = &data->dev_conf;
870         int ret;
871
872         /* If this eventdev is already configured, we must release the current
873          * scheduling domain before attempting to configure a new one.
874          */
875         if (dlb2->configured) {
876                 dlb2_hw_reset_sched_domain(dev, true);
877                 ret = dlb2_hw_query_resources(dlb2);
878                 if (ret) {
879                         DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
880                                      ret, data->dev_id);
881                         return ret;
882                 }
883         }
884
885         if (config->nb_event_queues > rsrcs->num_queues) {
886                 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
887                              config->nb_event_queues,
888                              rsrcs->num_queues);
889                 return -EINVAL;
890         }
891         if (config->nb_event_ports > (rsrcs->num_ldb_ports
892                         + rsrcs->num_dir_ports)) {
893                 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
894                              config->nb_event_ports,
895                              (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
896                 return -EINVAL;
897         }
898         if (config->nb_events_limit > rsrcs->nb_events_limit) {
899                 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
900                              config->nb_events_limit,
901                              rsrcs->nb_events_limit);
902                 return -EINVAL;
903         }
904
905         if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
906                 dlb2->global_dequeue_wait = false;
907         else {
908                 uint32_t timeout32;
909
910                 dlb2->global_dequeue_wait = true;
911
912                 /* note size mismatch of timeout vals in eventdev lib. */
913                 timeout32 = config->dequeue_timeout_ns;
914
915                 dlb2->global_dequeue_wait_ticks =
916                         timeout32 * (rte_get_timer_hz() / 1E9);
917         }
918
919         /* Does this platform support umonitor/umwait? */
920         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
921                 dlb2->umwait_allowed = true;
922
923         rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
924         rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
925         /* 1 dir queue per dir port */
926         rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
927
928         if (dlb2->version == DLB2_HW_V2_5) {
929                 rsrcs->num_credits = 0;
930                 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
931                         rsrcs->num_credits = config->nb_events_limit;
932         } else {
933                 /* Scale down nb_events_limit by 4 for directed credits,
934                  * since there are 4x as many load-balanced credits.
935                  */
936                 rsrcs->num_ldb_credits = 0;
937                 rsrcs->num_dir_credits = 0;
938
939                 if (rsrcs->num_ldb_queues)
940                         rsrcs->num_ldb_credits = config->nb_events_limit;
941                 if (rsrcs->num_dir_ports)
942                         rsrcs->num_dir_credits = config->nb_events_limit / 2;
943                 if (dlb2->num_dir_credits_override != -1)
944                         rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
945         }
946
947         if (dlb2_hw_create_sched_domain(dlb2, handle, rsrcs,
948                                         dlb2->version) < 0) {
949                 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
950                 return -ENODEV;
951         }
952
953         dlb2->new_event_limit = config->nb_events_limit;
954         __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
955
956         /* Save number of ports/queues for this event dev */
957         dlb2->num_ports = config->nb_event_ports;
958         dlb2->num_queues = config->nb_event_queues;
959         dlb2->num_dir_ports = rsrcs->num_dir_ports;
960         dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
961         dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
962         dlb2->num_dir_queues = dlb2->num_dir_ports;
963         if (dlb2->version == DLB2_HW_V2_5) {
964                 dlb2->credit_pool = rsrcs->num_credits;
965                 dlb2->max_credits = rsrcs->num_credits;
966         } else {
967                 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
968                 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
969                 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
970                 dlb2->max_dir_credits = rsrcs->num_dir_credits;
971         }
972
973         dlb2->configured = true;
974
975         return 0;
976 }
977
978 static void
979 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
980                                     uint8_t port_id,
981                                     struct rte_event_port_conf *port_conf)
982 {
983         RTE_SET_USED(port_id);
984         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
985
986         port_conf->new_event_threshold = dlb2->new_event_limit;
987         port_conf->dequeue_depth = 32;
988         port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
989         port_conf->event_port_cfg = 0;
990 }
991
992 static void
993 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
994                                      uint8_t queue_id,
995                                      struct rte_event_queue_conf *queue_conf)
996 {
997         RTE_SET_USED(dev);
998         RTE_SET_USED(queue_id);
999
1000         queue_conf->nb_atomic_flows = 1024;
1001         queue_conf->nb_atomic_order_sequences = 64;
1002         queue_conf->event_queue_cfg = 0;
1003         queue_conf->priority = 0;
1004 }
1005
1006 static int32_t
1007 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
1008 {
1009         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1010         struct dlb2_get_sn_allocation_args cfg;
1011         int ret;
1012
1013         cfg.group = group;
1014
1015         ret = dlb2_iface_get_sn_allocation(handle, &cfg);
1016         if (ret < 0) {
1017                 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
1018                              ret, dlb2_error_strings[cfg.response.status]);
1019                 return ret;
1020         }
1021
1022         return cfg.response.id;
1023 }
1024
1025 static int
1026 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
1027 {
1028         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1029         struct dlb2_set_sn_allocation_args cfg;
1030         int ret;
1031
1032         cfg.num = num;
1033         cfg.group = group;
1034
1035         ret = dlb2_iface_set_sn_allocation(handle, &cfg);
1036         if (ret < 0) {
1037                 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
1038                              ret, dlb2_error_strings[cfg.response.status]);
1039                 return ret;
1040         }
1041
1042         return ret;
1043 }
1044
1045 static int32_t
1046 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
1047 {
1048         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1049         struct dlb2_get_sn_occupancy_args cfg;
1050         int ret;
1051
1052         cfg.group = group;
1053
1054         ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
1055         if (ret < 0) {
1056                 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
1057                              ret, dlb2_error_strings[cfg.response.status]);
1058                 return ret;
1059         }
1060
1061         return cfg.response.id;
1062 }
1063
1064 /* Query the current sequence number allocations and, if they conflict with the
1065  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
1066  * This is best-effort; if it fails, the PMD will attempt to configure the
1067  * load-balanced queue and return an error.
1068  */
1069 static void
1070 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
1071                            const struct rte_event_queue_conf *queue_conf)
1072 {
1073         int grp_occupancy[DLB2_NUM_SN_GROUPS];
1074         int grp_alloc[DLB2_NUM_SN_GROUPS];
1075         int i, sequence_numbers;
1076
1077         sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
1078
1079         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
1080                 int total_slots;
1081
1082                 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
1083                 if (grp_alloc[i] < 0)
1084                         return;
1085
1086                 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
1087
1088                 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
1089                 if (grp_occupancy[i] < 0)
1090                         return;
1091
1092                 /* DLB has at least one available slot for the requested
1093                  * sequence numbers, so no further configuration required.
1094                  */
1095                 if (grp_alloc[i] == sequence_numbers &&
1096                     grp_occupancy[i] < total_slots)
1097                         return;
1098         }
1099
1100         /* None of the sequence number groups are configured for the requested
1101          * sequence numbers, so we have to reconfigure one of them. This is
1102          * only possible if a group is not in use.
1103          */
1104         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
1105                 if (grp_occupancy[i] == 0)
1106                         break;
1107         }
1108
1109         if (i == DLB2_NUM_SN_GROUPS) {
1110                 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
1111                        __func__, sequence_numbers);
1112                 return;
1113         }
1114
1115         /* Attempt to configure slot i with the requested number of sequence
1116          * numbers. Ignore the return value -- if this fails, the error will be
1117          * caught during subsequent queue configuration.
1118          */
1119         dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
1120 }
1121
1122 static int32_t
1123 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
1124                          struct dlb2_eventdev_queue *ev_queue,
1125                          const struct rte_event_queue_conf *evq_conf)
1126 {
1127         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1128         struct dlb2_queue *queue = &ev_queue->qm_queue;
1129         struct dlb2_create_ldb_queue_args cfg;
1130         int32_t ret;
1131         uint32_t qm_qid;
1132         int sched_type = -1;
1133
1134         if (evq_conf == NULL)
1135                 return -EINVAL;
1136
1137         if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
1138                 if (evq_conf->nb_atomic_order_sequences != 0)
1139                         sched_type = RTE_SCHED_TYPE_ORDERED;
1140                 else
1141                         sched_type = RTE_SCHED_TYPE_PARALLEL;
1142         } else
1143                 sched_type = evq_conf->schedule_type;
1144
1145         cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
1146         cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
1147         cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
1148
1149         if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1150                 cfg.num_sequence_numbers = 0;
1151                 cfg.num_qid_inflights = 2048;
1152         }
1153
1154         /* App should set this to the number of hardware flows they want, not
1155          * the overall number of flows they're going to use. E.g. if app is
1156          * using 64 flows and sets compression to 64, best-case they'll get
1157          * 64 unique hashed flows in hardware.
1158          */
1159         switch (evq_conf->nb_atomic_flows) {
1160         /* Valid DLB2 compression levels */
1161         case 64:
1162         case 128:
1163         case 256:
1164         case 512:
1165         case (1 * 1024): /* 1K */
1166         case (2 * 1024): /* 2K */
1167         case (4 * 1024): /* 4K */
1168         case (64 * 1024): /* 64K */
1169                 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1170                 break;
1171         default:
1172                 /* Invalid compression level */
1173                 cfg.lock_id_comp_level = 0; /* no compression */
1174         }
1175
1176         if (ev_queue->depth_threshold == 0) {
1177                 cfg.depth_threshold = dlb2->default_depth_thresh;
1178                 ev_queue->depth_threshold =
1179                         dlb2->default_depth_thresh;
1180         } else
1181                 cfg.depth_threshold = ev_queue->depth_threshold;
1182
1183         ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1184         if (ret < 0) {
1185                 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1186                              ret, dlb2_error_strings[cfg.response.status]);
1187                 return -EINVAL;
1188         }
1189
1190         qm_qid = cfg.response.id;
1191
1192         /* Save off queue config for debug, resource lookups, and reconfig */
1193         queue->num_qid_inflights = cfg.num_qid_inflights;
1194         queue->num_atm_inflights = cfg.num_atomic_inflights;
1195
1196         queue->sched_type = sched_type;
1197         queue->config_state = DLB2_CONFIGURED;
1198
1199         DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1200                      qm_qid,
1201                      cfg.num_atomic_inflights,
1202                      cfg.num_sequence_numbers,
1203                      cfg.num_qid_inflights);
1204
1205         return qm_qid;
1206 }
1207
1208 static int
1209 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1210                               struct dlb2_eventdev_queue *ev_queue,
1211                               const struct rte_event_queue_conf *queue_conf)
1212 {
1213         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1214         int32_t qm_qid;
1215
1216         if (queue_conf->nb_atomic_order_sequences)
1217                 dlb2_program_sn_allocation(dlb2, queue_conf);
1218
1219         qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1220         if (qm_qid < 0) {
1221                 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1222
1223                 return qm_qid;
1224         }
1225
1226         dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1227
1228         ev_queue->qm_queue.id = qm_qid;
1229
1230         return 0;
1231 }
1232
1233 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1234 {
1235         int i, num = 0;
1236
1237         for (i = 0; i < dlb2->num_queues; i++) {
1238                 if (dlb2->ev_queues[i].setup_done &&
1239                     dlb2->ev_queues[i].qm_queue.is_directed)
1240                         num++;
1241         }
1242
1243         return num;
1244 }
1245
1246 static void
1247 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1248                          struct dlb2_eventdev_queue *ev_queue)
1249 {
1250         struct dlb2_eventdev_port *ev_port;
1251         int i, j;
1252
1253         for (i = 0; i < dlb2->num_ports; i++) {
1254                 ev_port = &dlb2->ev_ports[i];
1255
1256                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1257                         if (!ev_port->link[j].valid ||
1258                             ev_port->link[j].queue_id != ev_queue->id)
1259                                 continue;
1260
1261                         ev_port->link[j].valid = false;
1262                         ev_port->num_links--;
1263                 }
1264         }
1265
1266         ev_queue->num_links = 0;
1267 }
1268
1269 static int
1270 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1271                           uint8_t ev_qid,
1272                           const struct rte_event_queue_conf *queue_conf)
1273 {
1274         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1275         struct dlb2_eventdev_queue *ev_queue;
1276         int ret;
1277
1278         if (queue_conf == NULL)
1279                 return -EINVAL;
1280
1281         if (ev_qid >= dlb2->num_queues)
1282                 return -EINVAL;
1283
1284         ev_queue = &dlb2->ev_queues[ev_qid];
1285
1286         ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1287                 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1288         ev_queue->id = ev_qid;
1289         ev_queue->conf = *queue_conf;
1290
1291         if (!ev_queue->qm_queue.is_directed) {
1292                 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1293         } else {
1294                 /* The directed queue isn't setup until link time, at which
1295                  * point we know its directed port ID. Directed queue setup
1296                  * will only fail if this queue is already setup or there are
1297                  * no directed queues left to configure.
1298                  */
1299                 ret = 0;
1300
1301                 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1302
1303                 if (ev_queue->setup_done ||
1304                     dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1305                         ret = -EINVAL;
1306         }
1307
1308         /* Tear down pre-existing port->queue links */
1309         if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1310                 dlb2_queue_link_teardown(dlb2, ev_queue);
1311
1312         if (!ret)
1313                 ev_queue->setup_done = true;
1314
1315         return ret;
1316 }
1317
1318 static int
1319 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1320 {
1321         struct dlb2_cq_pop_qe *qe;
1322
1323         qe = rte_zmalloc(mz_name,
1324                         DLB2_NUM_QES_PER_CACHE_LINE *
1325                                 sizeof(struct dlb2_cq_pop_qe),
1326                         RTE_CACHE_LINE_SIZE);
1327
1328         if (qe == NULL) {
1329                 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1330                 return -ENOMEM;
1331         }
1332         qm_port->consume_qe = qe;
1333
1334         qe->qe_valid = 0;
1335         qe->qe_frag = 0;
1336         qe->qe_comp = 0;
1337         qe->cq_token = 1;
1338         /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1339          * and so on.
1340          */
1341         qe->tokens = 0; /* set at run time */
1342         qe->meas_lat = 0;
1343         qe->no_dec = 0;
1344         /* Completion IDs are disabled */
1345         qe->cmp_id = 0;
1346
1347         return 0;
1348 }
1349
1350 static int
1351 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1352 {
1353         struct dlb2_enqueue_qe *qe;
1354
1355         qe = rte_zmalloc(mz_name,
1356                         DLB2_NUM_QES_PER_CACHE_LINE *
1357                                 sizeof(struct dlb2_enqueue_qe),
1358                         RTE_CACHE_LINE_SIZE);
1359
1360         if (qe == NULL) {
1361                 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1362                 return -ENOMEM;
1363         }
1364         qm_port->int_arm_qe = qe;
1365
1366         /* V2 - INT ARM is CQ_TOKEN + FRAG */
1367         qe->qe_valid = 0;
1368         qe->qe_frag = 1;
1369         qe->qe_comp = 0;
1370         qe->cq_token = 1;
1371         qe->meas_lat = 0;
1372         qe->no_dec = 0;
1373         /* Completion IDs are disabled */
1374         qe->cmp_id = 0;
1375
1376         return 0;
1377 }
1378
1379 static int
1380 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1381 {
1382         int ret, sz;
1383
1384         sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1385
1386         qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1387
1388         if (qm_port->qe4 == NULL) {
1389                 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1390                 ret = -ENOMEM;
1391                 goto error_exit;
1392         }
1393
1394         ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1395         if (ret < 0) {
1396                 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1397                 goto error_exit;
1398         }
1399
1400         ret = dlb2_init_consume_qe(qm_port, mz_name);
1401         if (ret < 0) {
1402                 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1403                 goto error_exit;
1404         }
1405
1406         return 0;
1407
1408 error_exit:
1409
1410         dlb2_free_qe_mem(qm_port);
1411
1412         return ret;
1413 }
1414
1415 static inline uint16_t
1416 dlb2_event_enqueue_delayed(void *event_port,
1417                            const struct rte_event events[]);
1418
1419 static inline uint16_t
1420 dlb2_event_enqueue_burst_delayed(void *event_port,
1421                                  const struct rte_event events[],
1422                                  uint16_t num);
1423
1424 static inline uint16_t
1425 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1426                                      const struct rte_event events[],
1427                                      uint16_t num);
1428
1429 static inline uint16_t
1430 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1431                                          const struct rte_event events[],
1432                                          uint16_t num);
1433
1434 /* Generate the required bitmask for rotate-style expected QE gen bits.
1435  * This requires a pattern of 1's and zeros, starting with expected as
1436  * 1 bits, so when hardware writes 0's they're "new". This requires the
1437  * ring size to be powers of 2 to wrap correctly.
1438  */
1439 static void
1440 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1441 {
1442         uint64_t cq_build_mask = 0;
1443         uint32_t i;
1444
1445         if (cq_depth > 64)
1446                 return; /* need to fall back to scalar code */
1447
1448         /*
1449          * all 1's in first u64, all zeros in second is correct bit pattern to
1450          * start. Special casing == 64 easier than adapting complex loop logic.
1451          */
1452         if (cq_depth == 64) {
1453                 qm_port->cq_rolling_mask = 0;
1454                 qm_port->cq_rolling_mask_2 = -1;
1455                 return;
1456         }
1457
1458         for (i = 0; i < 64; i += (cq_depth * 2))
1459                 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1460
1461         qm_port->cq_rolling_mask = cq_build_mask;
1462         qm_port->cq_rolling_mask_2 = cq_build_mask;
1463 }
1464
1465 static int
1466 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1467                         struct dlb2_eventdev_port *ev_port,
1468                         uint32_t dequeue_depth,
1469                         uint32_t enqueue_depth)
1470 {
1471         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1472         struct dlb2_create_ldb_port_args cfg = { {0} };
1473         int ret;
1474         struct dlb2_port *qm_port = NULL;
1475         char mz_name[RTE_MEMZONE_NAMESIZE];
1476         uint32_t qm_port_id;
1477         uint16_t ldb_credit_high_watermark = 0;
1478         uint16_t dir_credit_high_watermark = 0;
1479         uint16_t credit_high_watermark = 0;
1480
1481         if (handle == NULL)
1482                 return -EINVAL;
1483
1484         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1485                 DLB2_LOG_ERR("dlb2: invalid cq depth, must be at least %d\n",
1486                              DLB2_MIN_CQ_DEPTH);
1487                 return -EINVAL;
1488         }
1489
1490         if (dlb2->version == DLB2_HW_V2 && ev_port->cq_weight != 0 &&
1491             ev_port->cq_weight > dequeue_depth) {
1492                 DLB2_LOG_ERR("dlb2: invalid cq depth, must be >= cq weight%d\n",
1493                              DLB2_MIN_ENQUEUE_DEPTH);
1494                 return -EINVAL;
1495         }
1496
1497         rte_spinlock_lock(&handle->resource_lock);
1498
1499         /* We round up to the next power of 2 if necessary */
1500         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1501         cfg.cq_depth_threshold = 1;
1502
1503         cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1504
1505         cfg.cos_id = ev_port->cos_id;
1506         cfg.cos_strict = 0;/* best effots */
1507
1508         /* User controls the LDB high watermark via enqueue depth. The DIR high
1509          * watermark is equal, unless the directed credit pool is too small.
1510          */
1511         if (dlb2->version == DLB2_HW_V2) {
1512                 ldb_credit_high_watermark = enqueue_depth;
1513                 /* If there are no directed ports, the kernel driver will
1514                  * ignore this port's directed credit settings. Don't use
1515                  * enqueue_depth if it would require more directed credits
1516                  * than are available.
1517                  */
1518                 dir_credit_high_watermark =
1519                         RTE_MIN(enqueue_depth,
1520                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1521         } else
1522                 credit_high_watermark = enqueue_depth;
1523
1524         /* Per QM values */
1525
1526         ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1527         if (ret < 0) {
1528                 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1529                              ret, dlb2_error_strings[cfg.response.status]);
1530                 goto error_exit;
1531         }
1532
1533         qm_port_id = cfg.response.id;
1534
1535         DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1536                      ev_port->id, qm_port_id);
1537
1538         qm_port = &ev_port->qm_port;
1539         qm_port->ev_port = ev_port; /* back ptr */
1540         qm_port->dlb2 = dlb2; /* back ptr */
1541         /*
1542          * Allocate and init local qe struct(s).
1543          * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1544          */
1545
1546         snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1547                  ev_port->id);
1548
1549         ret = dlb2_init_qe_mem(qm_port, mz_name);
1550         if (ret < 0) {
1551                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1552                 goto error_exit;
1553         }
1554
1555         qm_port->id = qm_port_id;
1556
1557         if (dlb2->version == DLB2_HW_V2) {
1558                 qm_port->cached_ldb_credits = 0;
1559                 qm_port->cached_dir_credits = 0;
1560                 if (ev_port->cq_weight) {
1561                         struct dlb2_enable_cq_weight_args
1562                                 cq_weight_args = { {0} };
1563
1564                         cq_weight_args.port_id = qm_port->id;
1565                         cq_weight_args.limit = ev_port->cq_weight;
1566                         ret = dlb2_iface_enable_cq_weight(handle, &cq_weight_args);
1567                         if (ret < 0) {
1568                                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1569                                         ret,
1570                                         dlb2_error_strings[cfg.response.  status]);
1571                                 goto error_exit;
1572                         }
1573                 }
1574                 qm_port->cq_weight = ev_port->cq_weight;
1575         } else {
1576                 qm_port->cached_credits = 0;
1577                 qm_port->cq_weight = 0;
1578         }
1579
1580         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1581          * the effective depth is smaller.
1582          */
1583         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1584         qm_port->cq_idx = 0;
1585         qm_port->cq_idx_unmasked = 0;
1586
1587         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1588                 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1589         else
1590                 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1591
1592         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1593         /* starting value of gen bit - it toggles at wrap time */
1594         qm_port->gen_bit = 1;
1595
1596         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1597
1598         qm_port->int_armed = false;
1599
1600         /* Save off for later use in info and lookup APIs. */
1601         qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1602
1603         qm_port->dequeue_depth = dequeue_depth;
1604         qm_port->token_pop_thresh = dequeue_depth;
1605
1606         /* The default enqueue functions do not include delayed-pop support for
1607          * performance reasons.
1608          */
1609         if (qm_port->token_pop_mode == DELAYED_POP) {
1610                 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1611                 dlb2->event_dev->enqueue_burst =
1612                         dlb2_event_enqueue_burst_delayed;
1613                 dlb2->event_dev->enqueue_new_burst =
1614                         dlb2_event_enqueue_new_burst_delayed;
1615                 dlb2->event_dev->enqueue_forward_burst =
1616                         dlb2_event_enqueue_forward_burst_delayed;
1617         }
1618
1619         qm_port->owed_tokens = 0;
1620         qm_port->issued_releases = 0;
1621
1622         /* Save config message too. */
1623         rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1624
1625         /* update state */
1626         qm_port->state = PORT_STARTED; /* enabled at create time */
1627         qm_port->config_state = DLB2_CONFIGURED;
1628
1629         if (dlb2->version == DLB2_HW_V2) {
1630                 qm_port->dir_credits = dir_credit_high_watermark;
1631                 qm_port->ldb_credits = ldb_credit_high_watermark;
1632                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1633                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1634
1635                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1636                              qm_port_id,
1637                              dequeue_depth,
1638                              qm_port->ldb_credits,
1639                              qm_port->dir_credits);
1640         } else {
1641                 qm_port->credits = credit_high_watermark;
1642                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1643
1644                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1645                              qm_port_id,
1646                              dequeue_depth,
1647                              qm_port->credits);
1648         }
1649
1650         qm_port->use_scalar = false;
1651
1652 #if (!defined RTE_ARCH_X86_64)
1653         qm_port->use_scalar = true;
1654 #else
1655         if ((qm_port->cq_depth > 64) ||
1656             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1657             (dlb2->vector_opts_enabled == false))
1658                 qm_port->use_scalar = true;
1659 #endif
1660
1661         rte_spinlock_unlock(&handle->resource_lock);
1662
1663         return 0;
1664
1665 error_exit:
1666
1667         if (qm_port)
1668                 dlb2_free_qe_mem(qm_port);
1669
1670         rte_spinlock_unlock(&handle->resource_lock);
1671
1672         DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1673
1674         return ret;
1675 }
1676
1677 static void
1678 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1679                         struct dlb2_eventdev_port *ev_port)
1680 {
1681         struct dlb2_eventdev_queue *ev_queue;
1682         int i;
1683
1684         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1685                 if (!ev_port->link[i].valid)
1686                         continue;
1687
1688                 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1689
1690                 ev_port->link[i].valid = false;
1691                 ev_port->num_links--;
1692                 ev_queue->num_links--;
1693         }
1694 }
1695
1696 static int
1697 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1698                         struct dlb2_eventdev_port *ev_port,
1699                         uint32_t dequeue_depth,
1700                         uint32_t enqueue_depth)
1701 {
1702         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1703         struct dlb2_create_dir_port_args cfg = { {0} };
1704         int ret;
1705         struct dlb2_port *qm_port = NULL;
1706         char mz_name[RTE_MEMZONE_NAMESIZE];
1707         uint32_t qm_port_id;
1708         uint16_t ldb_credit_high_watermark = 0;
1709         uint16_t dir_credit_high_watermark = 0;
1710         uint16_t credit_high_watermark = 0;
1711
1712         if (dlb2 == NULL || handle == NULL)
1713                 return -EINVAL;
1714
1715         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1716                 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1717                              DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1718                 return -EINVAL;
1719         }
1720
1721         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1722                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1723                              DLB2_MIN_ENQUEUE_DEPTH);
1724                 return -EINVAL;
1725         }
1726
1727         rte_spinlock_lock(&handle->resource_lock);
1728
1729         /* Directed queues are configured at link time. */
1730         cfg.queue_id = -1;
1731
1732         /* We round up to the next power of 2 if necessary */
1733         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1734         cfg.cq_depth_threshold = 1;
1735
1736         /* User controls the LDB high watermark via enqueue depth. The DIR high
1737          * watermark is equal, unless the directed credit pool is too small.
1738          */
1739         if (dlb2->version == DLB2_HW_V2) {
1740                 ldb_credit_high_watermark = enqueue_depth;
1741                 /* Don't use enqueue_depth if it would require more directed
1742                  * credits than are available.
1743                  */
1744                 dir_credit_high_watermark =
1745                         RTE_MIN(enqueue_depth,
1746                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1747         } else
1748                 credit_high_watermark = enqueue_depth;
1749
1750         /* Per QM values */
1751
1752         ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1753         if (ret < 0) {
1754                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1755                              ret, dlb2_error_strings[cfg.response.status]);
1756                 goto error_exit;
1757         }
1758
1759         qm_port_id = cfg.response.id;
1760
1761         DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1762                      ev_port->id, qm_port_id);
1763
1764         qm_port = &ev_port->qm_port;
1765         qm_port->ev_port = ev_port; /* back ptr */
1766         qm_port->dlb2 = dlb2;  /* back ptr */
1767
1768         /*
1769          * Init local qe struct(s).
1770          * Note: MOVDIR64 requires the enqueue QE to be aligned
1771          */
1772
1773         snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1774                  ev_port->id);
1775
1776         ret = dlb2_init_qe_mem(qm_port, mz_name);
1777
1778         if (ret < 0) {
1779                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1780                 goto error_exit;
1781         }
1782
1783         qm_port->id = qm_port_id;
1784
1785         if (dlb2->version == DLB2_HW_V2) {
1786                 qm_port->cached_ldb_credits = 0;
1787                 qm_port->cached_dir_credits = 0;
1788         } else
1789                 qm_port->cached_credits = 0;
1790
1791         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1792          * the effective depth is smaller.
1793          */
1794         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1795         qm_port->cq_idx = 0;
1796         qm_port->cq_idx_unmasked = 0;
1797
1798         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1799                 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1800         else
1801                 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1802
1803         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1804         /* starting value of gen bit - it toggles at wrap time */
1805         qm_port->gen_bit = 1;
1806         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1807
1808         qm_port->int_armed = false;
1809
1810         /* Save off for later use in info and lookup APIs. */
1811         qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1812
1813         qm_port->dequeue_depth = dequeue_depth;
1814
1815         /* Directed ports are auto-pop, by default. */
1816         qm_port->token_pop_mode = AUTO_POP;
1817         qm_port->owed_tokens = 0;
1818         qm_port->issued_releases = 0;
1819
1820         /* Save config message too. */
1821         rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1822
1823         /* update state */
1824         qm_port->state = PORT_STARTED; /* enabled at create time */
1825         qm_port->config_state = DLB2_CONFIGURED;
1826
1827         if (dlb2->version == DLB2_HW_V2) {
1828                 qm_port->dir_credits = dir_credit_high_watermark;
1829                 qm_port->ldb_credits = ldb_credit_high_watermark;
1830                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1831                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1832
1833                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1834                              qm_port_id,
1835                              dequeue_depth,
1836                              dir_credit_high_watermark,
1837                              ldb_credit_high_watermark);
1838         } else {
1839                 qm_port->credits = credit_high_watermark;
1840                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1841
1842                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1843                              qm_port_id,
1844                              dequeue_depth,
1845                              credit_high_watermark);
1846         }
1847
1848 #if (!defined RTE_ARCH_X86_64)
1849         qm_port->use_scalar = true;
1850 #else
1851         if ((qm_port->cq_depth > 64) ||
1852             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1853             (dlb2->vector_opts_enabled == false))
1854                 qm_port->use_scalar = true;
1855 #endif
1856
1857         rte_spinlock_unlock(&handle->resource_lock);
1858
1859         return 0;
1860
1861 error_exit:
1862
1863         if (qm_port)
1864                 dlb2_free_qe_mem(qm_port);
1865
1866         rte_spinlock_unlock(&handle->resource_lock);
1867
1868         DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1869
1870         return ret;
1871 }
1872
1873 static int
1874 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1875                          uint8_t ev_port_id,
1876                          const struct rte_event_port_conf *port_conf)
1877 {
1878         struct dlb2_eventdev *dlb2;
1879         struct dlb2_eventdev_port *ev_port;
1880         int ret;
1881         uint32_t hw_credit_quanta, sw_credit_quanta;
1882
1883         if (dev == NULL || port_conf == NULL) {
1884                 DLB2_LOG_ERR("Null parameter\n");
1885                 return -EINVAL;
1886         }
1887
1888         dlb2 = dlb2_pmd_priv(dev);
1889
1890         if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1891                 return -EINVAL;
1892
1893         if (port_conf->dequeue_depth >
1894                 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1895             port_conf->enqueue_depth >
1896                 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1897                 return -EINVAL;
1898
1899         ev_port = &dlb2->ev_ports[ev_port_id];
1900         /* configured? */
1901         if (ev_port->setup_done) {
1902                 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1903                 return -EINVAL;
1904         }
1905
1906         /* Default for worker ports */
1907         sw_credit_quanta = dlb2->sw_credit_quanta;
1908         hw_credit_quanta = dlb2->hw_credit_quanta;
1909
1910         ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1911                 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1912
1913         /*
1914          * Validate credit config before creating port
1915          */
1916
1917         /* Default for worker ports */
1918         sw_credit_quanta = dlb2->sw_credit_quanta;
1919         hw_credit_quanta = dlb2->hw_credit_quanta;
1920
1921         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1922                 /* Producer type ports. Mostly enqueue */
1923                 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1924                 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1925         }
1926         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1927                 /* Consumer type ports. Mostly dequeue */
1928                 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1929                 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1930         }
1931         ev_port->credit_update_quanta = sw_credit_quanta;
1932         ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1933
1934         if (port_conf->enqueue_depth > sw_credit_quanta ||
1935             port_conf->enqueue_depth > hw_credit_quanta) {
1936                 DLB2_LOG_ERR("Invalid port config. Enqueue depth %d must be <= credit quanta %d and batch size %d\n",
1937                              port_conf->enqueue_depth,
1938                              sw_credit_quanta,
1939                              hw_credit_quanta);
1940                 return -EINVAL;
1941         }
1942         ev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta;
1943
1944         /*
1945          * Create port
1946          */
1947
1948         if (!ev_port->qm_port.is_directed) {
1949                 ret = dlb2_hw_create_ldb_port(dlb2,
1950                                               ev_port,
1951                                               port_conf->dequeue_depth,
1952                                               port_conf->enqueue_depth);
1953                 if (ret < 0) {
1954                         DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1955                                      ev_port_id);
1956
1957                         return ret;
1958                 }
1959         } else {
1960                 ret = dlb2_hw_create_dir_port(dlb2,
1961                                               ev_port,
1962                                               port_conf->dequeue_depth,
1963                                               port_conf->enqueue_depth);
1964                 if (ret < 0) {
1965                         DLB2_LOG_ERR("Failed to create the DIR port\n");
1966                         return ret;
1967                 }
1968         }
1969
1970         /* Save off port config for reconfig */
1971         ev_port->conf = *port_conf;
1972
1973         ev_port->id = ev_port_id;
1974         ev_port->enq_configured = true;
1975         ev_port->setup_done = true;
1976         ev_port->inflight_max = port_conf->new_event_threshold;
1977         ev_port->implicit_release = !(port_conf->event_port_cfg &
1978                   RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1979         ev_port->outstanding_releases = 0;
1980         ev_port->inflight_credits = 0;
1981         ev_port->dlb2 = dlb2; /* reverse link */
1982
1983         /* Tear down pre-existing port->queue links */
1984         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1985                 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1986
1987         dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1988
1989         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
1990             rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
1991                 ev_port->qm_port.use_avx512 = true;
1992         else
1993                 ev_port->qm_port.use_avx512 = false;
1994
1995         return 0;
1996 }
1997
1998 static int16_t
1999 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
2000                             uint32_t qm_port_id,
2001                             uint16_t qm_qid,
2002                             uint8_t priority)
2003 {
2004         struct dlb2_map_qid_args cfg;
2005         int32_t ret;
2006
2007         if (handle == NULL)
2008                 return -EINVAL;
2009
2010         /* Build message */
2011         cfg.port_id = qm_port_id;
2012         cfg.qid = qm_qid;
2013         cfg.priority = EV_TO_DLB2_PRIO(priority);
2014
2015         ret = dlb2_iface_map_qid(handle, &cfg);
2016         if (ret < 0) {
2017                 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
2018                              ret, dlb2_error_strings[cfg.response.status]);
2019                 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
2020                              handle->domain_id, cfg.port_id,
2021                              cfg.qid,
2022                              cfg.priority);
2023         } else {
2024                 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
2025                              qm_qid, qm_port_id);
2026         }
2027
2028         return ret;
2029 }
2030
2031 static int
2032 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
2033                           struct dlb2_eventdev_port *ev_port,
2034                           struct dlb2_eventdev_queue *ev_queue,
2035                           uint8_t priority)
2036 {
2037         int first_avail = -1;
2038         int ret, i;
2039
2040         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2041                 if (ev_port->link[i].valid) {
2042                         if (ev_port->link[i].queue_id == ev_queue->id &&
2043                             ev_port->link[i].priority == priority) {
2044                                 if (ev_port->link[i].mapped)
2045                                         return 0; /* already mapped */
2046                                 first_avail = i;
2047                         }
2048                 } else if (first_avail == -1)
2049                         first_avail = i;
2050         }
2051         if (first_avail == -1) {
2052                 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
2053                              ev_port->qm_port.id);
2054                 return -EINVAL;
2055         }
2056
2057         ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
2058                                           ev_port->qm_port.id,
2059                                           ev_queue->qm_queue.id,
2060                                           priority);
2061
2062         if (!ret)
2063                 ev_port->link[first_avail].mapped = true;
2064
2065         return ret;
2066 }
2067
2068 static int32_t
2069 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
2070                          struct dlb2_eventdev_queue *ev_queue,
2071                          int32_t qm_port_id)
2072 {
2073         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2074         struct dlb2_create_dir_queue_args cfg;
2075         int32_t ret;
2076
2077         /* The directed port is always configured before its queue */
2078         cfg.port_id = qm_port_id;
2079
2080         if (ev_queue->depth_threshold == 0) {
2081                 cfg.depth_threshold = dlb2->default_depth_thresh;
2082                 ev_queue->depth_threshold =
2083                         dlb2->default_depth_thresh;
2084         } else
2085                 cfg.depth_threshold = ev_queue->depth_threshold;
2086
2087         ret = dlb2_iface_dir_queue_create(handle, &cfg);
2088         if (ret < 0) {
2089                 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
2090                              ret, dlb2_error_strings[cfg.response.status]);
2091                 return -EINVAL;
2092         }
2093
2094         return cfg.response.id;
2095 }
2096
2097 static int
2098 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
2099                               struct dlb2_eventdev_queue *ev_queue,
2100                               struct dlb2_eventdev_port *ev_port)
2101 {
2102         int32_t qm_qid;
2103
2104         qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
2105
2106         if (qm_qid < 0) {
2107                 DLB2_LOG_ERR("Failed to create the DIR queue\n");
2108                 return qm_qid;
2109         }
2110
2111         dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
2112
2113         ev_queue->qm_queue.id = qm_qid;
2114
2115         return 0;
2116 }
2117
2118 static int
2119 dlb2_do_port_link(struct rte_eventdev *dev,
2120                   struct dlb2_eventdev_queue *ev_queue,
2121                   struct dlb2_eventdev_port *ev_port,
2122                   uint8_t prio)
2123 {
2124         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2125         int err;
2126
2127         /* Don't link until start time. */
2128         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2129                 return 0;
2130
2131         if (ev_queue->qm_queue.is_directed)
2132                 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
2133         else
2134                 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
2135
2136         if (err) {
2137                 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
2138                              ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
2139                              ev_queue->id, ev_port->id);
2140
2141                 rte_errno = err;
2142                 return -1;
2143         }
2144
2145         return 0;
2146 }
2147
2148 static int
2149 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
2150                         uint8_t queue_id,
2151                         bool link_exists,
2152                         int index)
2153 {
2154         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2155         struct dlb2_eventdev_queue *ev_queue;
2156         bool port_is_dir, queue_is_dir;
2157
2158         if (queue_id > dlb2->num_queues) {
2159                 rte_errno = -EINVAL;
2160                 return -1;
2161         }
2162
2163         ev_queue = &dlb2->ev_queues[queue_id];
2164
2165         if (!ev_queue->setup_done &&
2166             ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
2167                 rte_errno = -EINVAL;
2168                 return -1;
2169         }
2170
2171         port_is_dir = ev_port->qm_port.is_directed;
2172         queue_is_dir = ev_queue->qm_queue.is_directed;
2173
2174         if (port_is_dir != queue_is_dir) {
2175                 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
2176                              queue_is_dir ? "DIR" : "LDB", ev_queue->id,
2177                              port_is_dir ? "DIR" : "LDB", ev_port->id);
2178
2179                 rte_errno = -EINVAL;
2180                 return -1;
2181         }
2182
2183         /* Check if there is space for the requested link */
2184         if (!link_exists && index == -1) {
2185                 DLB2_LOG_ERR("no space for new link\n");
2186                 rte_errno = -ENOSPC;
2187                 return -1;
2188         }
2189
2190         /* Check if the directed port is already linked */
2191         if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2192             !link_exists) {
2193                 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2194                              ev_port->id);
2195                 rte_errno = -EINVAL;
2196                 return -1;
2197         }
2198
2199         /* Check if the directed queue is already linked */
2200         if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2201             !link_exists) {
2202                 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2203                              ev_queue->id);
2204                 rte_errno = -EINVAL;
2205                 return -1;
2206         }
2207
2208         return 0;
2209 }
2210
2211 static int
2212 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2213                         const uint8_t queues[], const uint8_t priorities[],
2214                         uint16_t nb_links)
2215
2216 {
2217         struct dlb2_eventdev_port *ev_port = event_port;
2218         struct dlb2_eventdev *dlb2;
2219         int i, j;
2220
2221         RTE_SET_USED(dev);
2222
2223         if (ev_port == NULL) {
2224                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2225                 rte_errno = -EINVAL;
2226                 return 0;
2227         }
2228
2229         if (!ev_port->setup_done &&
2230             ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2231                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2232                 rte_errno = -EINVAL;
2233                 return 0;
2234         }
2235
2236         /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2237          * queues pointer.
2238          */
2239         if (nb_links == 0) {
2240                 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2241                 return 0; /* Ignore and return success */
2242         }
2243
2244         dlb2 = ev_port->dlb2;
2245
2246         DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2247                      nb_links,
2248                      ev_port->qm_port.is_directed ? "DIR" : "LDB",
2249                      ev_port->id);
2250
2251         for (i = 0; i < nb_links; i++) {
2252                 struct dlb2_eventdev_queue *ev_queue;
2253                 uint8_t queue_id, prio;
2254                 bool found = false;
2255                 int index = -1;
2256
2257                 queue_id = queues[i];
2258                 prio = priorities[i];
2259
2260                 /* Check if the link already exists. */
2261                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2262                         if (ev_port->link[j].valid) {
2263                                 if (ev_port->link[j].queue_id == queue_id) {
2264                                         found = true;
2265                                         index = j;
2266                                         break;
2267                                 }
2268                         } else if (index == -1) {
2269                                 index = j;
2270                         }
2271
2272                 /* could not link */
2273                 if (index == -1)
2274                         break;
2275
2276                 /* Check if already linked at the requested priority */
2277                 if (found && ev_port->link[j].priority == prio)
2278                         continue;
2279
2280                 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2281                         break; /* return index of offending queue */
2282
2283                 ev_queue = &dlb2->ev_queues[queue_id];
2284
2285                 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2286                         break; /* return index of offending queue */
2287
2288                 ev_queue->num_links++;
2289
2290                 ev_port->link[index].queue_id = queue_id;
2291                 ev_port->link[index].priority = prio;
2292                 ev_port->link[index].valid = true;
2293                 /* Entry already exists?  If so, then must be prio change */
2294                 if (!found)
2295                         ev_port->num_links++;
2296         }
2297         return i;
2298 }
2299
2300 static int16_t
2301 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2302                                 uint32_t qm_port_id,
2303                                 uint16_t qm_qid)
2304 {
2305         struct dlb2_unmap_qid_args cfg;
2306         int32_t ret;
2307
2308         if (handle == NULL)
2309                 return -EINVAL;
2310
2311         cfg.port_id = qm_port_id;
2312         cfg.qid = qm_qid;
2313
2314         ret = dlb2_iface_unmap_qid(handle, &cfg);
2315         if (ret < 0)
2316                 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2317                              ret, dlb2_error_strings[cfg.response.status]);
2318
2319         return ret;
2320 }
2321
2322 static int
2323 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2324                             struct dlb2_eventdev_port *ev_port,
2325                             struct dlb2_eventdev_queue *ev_queue)
2326 {
2327         int ret, i;
2328
2329         /* Don't unlink until start time. */
2330         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2331                 return 0;
2332
2333         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2334                 if (ev_port->link[i].valid &&
2335                     ev_port->link[i].queue_id == ev_queue->id)
2336                         break; /* found */
2337         }
2338
2339         /* This is expected with eventdev API!
2340          * It blindly attempts to unmap all queues.
2341          */
2342         if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2343                 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2344                              ev_queue->qm_queue.id,
2345                              ev_port->qm_port.id);
2346                 return 0;
2347         }
2348
2349         ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2350                                               ev_port->qm_port.id,
2351                                               ev_queue->qm_queue.id);
2352         if (!ret)
2353                 ev_port->link[i].mapped = false;
2354
2355         return ret;
2356 }
2357
2358 static int
2359 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2360                           uint8_t queues[], uint16_t nb_unlinks)
2361 {
2362         struct dlb2_eventdev_port *ev_port = event_port;
2363         struct dlb2_eventdev *dlb2;
2364         int i;
2365
2366         RTE_SET_USED(dev);
2367
2368         if (!ev_port->setup_done) {
2369                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2370                              ev_port->id);
2371                 rte_errno = -EINVAL;
2372                 return 0;
2373         }
2374
2375         if (queues == NULL || nb_unlinks == 0) {
2376                 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2377                 return 0; /* Ignore and return success */
2378         }
2379
2380         if (ev_port->qm_port.is_directed) {
2381                 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2382                              ev_port->id);
2383                 rte_errno = 0;
2384                 return nb_unlinks; /* as if success */
2385         }
2386
2387         dlb2 = ev_port->dlb2;
2388
2389         for (i = 0; i < nb_unlinks; i++) {
2390                 struct dlb2_eventdev_queue *ev_queue;
2391                 int ret, j;
2392
2393                 if (queues[i] >= dlb2->num_queues) {
2394                         DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2395                         rte_errno = -EINVAL;
2396                         return i; /* return index of offending queue */
2397                 }
2398
2399                 ev_queue = &dlb2->ev_queues[queues[i]];
2400
2401                 /* Does a link exist? */
2402                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2403                         if (ev_port->link[j].queue_id == queues[i] &&
2404                             ev_port->link[j].valid)
2405                                 break;
2406
2407                 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2408                         continue;
2409
2410                 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2411                 if (ret) {
2412                         DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2413                                      ret, ev_port->id, queues[i]);
2414                         rte_errno = -ENOENT;
2415                         return i; /* return index of offending queue */
2416                 }
2417
2418                 ev_port->link[j].valid = false;
2419                 ev_port->num_links--;
2420                 ev_queue->num_links--;
2421         }
2422
2423         return nb_unlinks;
2424 }
2425
2426 static int
2427 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2428                                        void *event_port)
2429 {
2430         struct dlb2_eventdev_port *ev_port = event_port;
2431         struct dlb2_eventdev *dlb2;
2432         struct dlb2_hw_dev *handle;
2433         struct dlb2_pending_port_unmaps_args cfg;
2434         int ret;
2435
2436         RTE_SET_USED(dev);
2437
2438         if (!ev_port->setup_done) {
2439                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2440                              ev_port->id);
2441                 rte_errno = -EINVAL;
2442                 return 0;
2443         }
2444
2445         cfg.port_id = ev_port->qm_port.id;
2446         dlb2 = ev_port->dlb2;
2447         handle = &dlb2->qm_instance;
2448         ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2449
2450         if (ret < 0) {
2451                 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2452                              ret, dlb2_error_strings[cfg.response.status]);
2453                 return ret;
2454         }
2455
2456         return cfg.response.id;
2457 }
2458
2459 static int
2460 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2461 {
2462         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2463         int ret, i;
2464
2465         /* If an event queue or port was previously configured, but hasn't been
2466          * reconfigured, reapply its original configuration.
2467          */
2468         for (i = 0; i < dlb2->num_queues; i++) {
2469                 struct dlb2_eventdev_queue *ev_queue;
2470
2471                 ev_queue = &dlb2->ev_queues[i];
2472
2473                 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2474                         continue;
2475
2476                 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2477                 if (ret < 0) {
2478                         DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2479                         return ret;
2480                 }
2481         }
2482
2483         for (i = 0; i < dlb2->num_ports; i++) {
2484                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2485
2486                 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2487                         continue;
2488
2489                 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2490                 if (ret < 0) {
2491                         DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2492                                      i);
2493                         return ret;
2494                 }
2495         }
2496
2497         return 0;
2498 }
2499
2500 static int
2501 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2502 {
2503         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2504         int i;
2505
2506         /* Perform requested port->queue links */
2507         for (i = 0; i < dlb2->num_ports; i++) {
2508                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2509                 int j;
2510
2511                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2512                         struct dlb2_eventdev_queue *ev_queue;
2513                         uint8_t prio, queue_id;
2514
2515                         if (!ev_port->link[j].valid)
2516                                 continue;
2517
2518                         prio = ev_port->link[j].priority;
2519                         queue_id = ev_port->link[j].queue_id;
2520
2521                         if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2522                                 return -EINVAL;
2523
2524                         ev_queue = &dlb2->ev_queues[queue_id];
2525
2526                         if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2527                                 return -EINVAL;
2528                 }
2529         }
2530
2531         return 0;
2532 }
2533
2534 static int
2535 dlb2_eventdev_start(struct rte_eventdev *dev)
2536 {
2537         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2538         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2539         struct dlb2_start_domain_args cfg;
2540         int ret, i;
2541
2542         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2543         if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2544                 DLB2_LOG_ERR("bad state %d for dev_start\n",
2545                              (int)dlb2->run_state);
2546                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2547                 return -EINVAL;
2548         }
2549         dlb2->run_state = DLB2_RUN_STATE_STARTING;
2550         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2551
2552         /* If the device was configured more than once, some event ports and/or
2553          * queues may need to be reconfigured.
2554          */
2555         ret = dlb2_eventdev_reapply_configuration(dev);
2556         if (ret)
2557                 return ret;
2558
2559         /* The DLB PMD delays port links until the device is started. */
2560         ret = dlb2_eventdev_apply_port_links(dev);
2561         if (ret)
2562                 return ret;
2563
2564         for (i = 0; i < dlb2->num_ports; i++) {
2565                 if (!dlb2->ev_ports[i].setup_done) {
2566                         DLB2_LOG_ERR("dlb2: port %d not setup", i);
2567                         return -ESTALE;
2568                 }
2569         }
2570
2571         for (i = 0; i < dlb2->num_queues; i++) {
2572                 if (dlb2->ev_queues[i].num_links == 0) {
2573                         DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2574                         return -ENOLINK;
2575                 }
2576         }
2577
2578         ret = dlb2_iface_sched_domain_start(handle, &cfg);
2579         if (ret < 0) {
2580                 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2581                              ret, dlb2_error_strings[cfg.response.status]);
2582                 return ret;
2583         }
2584
2585         dlb2->run_state = DLB2_RUN_STATE_STARTED;
2586         DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2587
2588         return 0;
2589 }
2590
2591 static inline uint32_t
2592 dlb2_port_credits_get(struct dlb2_port *qm_port,
2593                       enum dlb2_hw_queue_types type)
2594 {
2595         uint32_t credits = *qm_port->credit_pool[type];
2596         /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2597         uint32_t batch_size = qm_port->hw_credit_quanta;
2598
2599         if (unlikely(credits < batch_size))
2600                 batch_size = credits;
2601
2602         if (likely(credits &&
2603                    __atomic_compare_exchange_n(
2604                         qm_port->credit_pool[type],
2605                         &credits, credits - batch_size, false,
2606                         __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2607                 return batch_size;
2608         else
2609                 return 0;
2610 }
2611
2612 static inline void
2613 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2614                           struct dlb2_eventdev_port *ev_port)
2615 {
2616         uint16_t quanta = ev_port->credit_update_quanta;
2617
2618         if (ev_port->inflight_credits >= quanta * 2) {
2619                 /* Replenish credits, saving one quanta for enqueues */
2620                 uint16_t val = ev_port->inflight_credits - quanta;
2621
2622                 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2623                 ev_port->inflight_credits -= val;
2624         }
2625 }
2626
2627 static inline int
2628 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2629                               struct dlb2_eventdev_port *ev_port)
2630 {
2631         uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2632                                                 __ATOMIC_SEQ_CST);
2633         const int num = 1;
2634
2635         if (unlikely(ev_port->inflight_max < sw_inflights)) {
2636                 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2637                 rte_errno = -ENOSPC;
2638                 return 1;
2639         }
2640
2641         if (ev_port->inflight_credits < num) {
2642                 /* check if event enqueue brings ev_port over max threshold */
2643                 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2644
2645                 if (sw_inflights + credit_update_quanta >
2646                                 dlb2->new_event_limit) {
2647                         DLB2_INC_STAT(
2648                         ev_port->stats.traffic.tx_nospc_new_event_limit,
2649                         1);
2650                         rte_errno = -ENOSPC;
2651                         return 1;
2652                 }
2653
2654                 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2655                                    __ATOMIC_SEQ_CST);
2656                 ev_port->inflight_credits += (credit_update_quanta);
2657
2658                 if (ev_port->inflight_credits < num) {
2659                         DLB2_INC_STAT(
2660                         ev_port->stats.traffic.tx_nospc_inflight_credits,
2661                         1);
2662                         rte_errno = -ENOSPC;
2663                         return 1;
2664                 }
2665         }
2666
2667         return 0;
2668 }
2669
2670 static inline int
2671 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2672 {
2673         if (unlikely(qm_port->cached_ldb_credits == 0)) {
2674                 qm_port->cached_ldb_credits =
2675                         dlb2_port_credits_get(qm_port,
2676                                               DLB2_LDB_QUEUE);
2677                 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2678                         DLB2_INC_STAT(
2679                         qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2680                         1);
2681                         DLB2_LOG_DBG("ldb credits exhausted\n");
2682                         return 1; /* credits exhausted */
2683                 }
2684         }
2685
2686         return 0;
2687 }
2688
2689 static inline int
2690 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2691 {
2692         if (unlikely(qm_port->cached_dir_credits == 0)) {
2693                 qm_port->cached_dir_credits =
2694                         dlb2_port_credits_get(qm_port,
2695                                               DLB2_DIR_QUEUE);
2696                 if (unlikely(qm_port->cached_dir_credits == 0)) {
2697                         DLB2_INC_STAT(
2698                         qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2699                         1);
2700                         DLB2_LOG_DBG("dir credits exhausted\n");
2701                         return 1; /* credits exhausted */
2702                 }
2703         }
2704
2705         return 0;
2706 }
2707
2708 static inline int
2709 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2710 {
2711         if (unlikely(qm_port->cached_credits == 0)) {
2712                 qm_port->cached_credits =
2713                         dlb2_port_credits_get(qm_port,
2714                                               DLB2_COMBINED_POOL);
2715                 if (unlikely(qm_port->cached_credits == 0)) {
2716                         DLB2_INC_STAT(
2717                         qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2718                         DLB2_LOG_DBG("credits exhausted\n");
2719                         return 1; /* credits exhausted */
2720                 }
2721         }
2722
2723         return 0;
2724 }
2725
2726 static __rte_always_inline void
2727 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2728               struct process_local_port_data *port_data)
2729 {
2730         dlb2_movdir64b(port_data->pp_addr, qe4);
2731 }
2732
2733 static inline int
2734 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2735 {
2736         struct process_local_port_data *port_data;
2737         struct dlb2_cq_pop_qe *qe;
2738
2739         RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2740
2741         qe = qm_port->consume_qe;
2742
2743         qe->tokens = num - 1;
2744
2745         /* No store fence needed since no pointer is being sent, and CQ token
2746          * pops can be safely reordered with other HCWs.
2747          */
2748         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2749
2750         dlb2_movntdq_single(port_data->pp_addr, qe);
2751
2752         DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2753
2754         qm_port->owed_tokens = 0;
2755
2756         return 0;
2757 }
2758
2759 static inline void
2760 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2761                    bool do_sfence,
2762                    struct process_local_port_data *port_data)
2763 {
2764         /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2765          * application writes complete before enqueueing the QE.
2766          */
2767         if (do_sfence)
2768                 rte_wmb();
2769
2770         dlb2_pp_write(qm_port->qe4, port_data);
2771 }
2772
2773 static inline void
2774 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2775 {
2776         struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2777         int num = qm_port->owed_tokens;
2778
2779         qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2780         qe[idx].tokens = num - 1;
2781
2782         qm_port->owed_tokens = 0;
2783 }
2784
2785 static inline int
2786 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2787                         struct dlb2_port *qm_port,
2788                         const struct rte_event ev[],
2789                         uint8_t *sched_type,
2790                         uint8_t *queue_id)
2791 {
2792         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2793         struct dlb2_eventdev_queue *ev_queue;
2794         uint16_t *cached_credits = NULL;
2795         struct dlb2_queue *qm_queue;
2796
2797         ev_queue = &dlb2->ev_queues[ev->queue_id];
2798         qm_queue = &ev_queue->qm_queue;
2799         *queue_id = qm_queue->id;
2800
2801         /* Ignore sched_type and hardware credits on release events */
2802         if (ev->op == RTE_EVENT_OP_RELEASE)
2803                 goto op_check;
2804
2805         if (!qm_queue->is_directed) {
2806                 /* Load balanced destination queue */
2807
2808                 if (dlb2->version == DLB2_HW_V2) {
2809                         if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2810                                 rte_errno = -ENOSPC;
2811                                 return 1;
2812                         }
2813                         cached_credits = &qm_port->cached_ldb_credits;
2814                 } else {
2815                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2816                                 rte_errno = -ENOSPC;
2817                                 return 1;
2818                         }
2819                         cached_credits = &qm_port->cached_credits;
2820                 }
2821                 switch (ev->sched_type) {
2822                 case RTE_SCHED_TYPE_ORDERED:
2823                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2824                         if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2825                                 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2826                                              *queue_id);
2827                                 rte_errno = -EINVAL;
2828                                 return 1;
2829                         }
2830                         *sched_type = DLB2_SCHED_ORDERED;
2831                         break;
2832                 case RTE_SCHED_TYPE_ATOMIC:
2833                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2834                         *sched_type = DLB2_SCHED_ATOMIC;
2835                         break;
2836                 case RTE_SCHED_TYPE_PARALLEL:
2837                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2838                         if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2839                                 *sched_type = DLB2_SCHED_ORDERED;
2840                         else
2841                                 *sched_type = DLB2_SCHED_UNORDERED;
2842                         break;
2843                 default:
2844                         DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2845                         DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2846                         rte_errno = -EINVAL;
2847                         return 1;
2848                 }
2849         } else {
2850                 /* Directed destination queue */
2851
2852                 if (dlb2->version == DLB2_HW_V2) {
2853                         if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2854                                 rte_errno = -ENOSPC;
2855                                 return 1;
2856                         }
2857                         cached_credits = &qm_port->cached_dir_credits;
2858                 } else {
2859                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2860                                 rte_errno = -ENOSPC;
2861                                 return 1;
2862                         }
2863                         cached_credits = &qm_port->cached_credits;
2864                 }
2865                 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2866
2867                 *sched_type = DLB2_SCHED_DIRECTED;
2868         }
2869
2870 op_check:
2871         switch (ev->op) {
2872         case RTE_EVENT_OP_NEW:
2873                 /* Check that a sw credit is available */
2874                 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2875                         rte_errno = -ENOSPC;
2876                         return 1;
2877                 }
2878                 ev_port->inflight_credits--;
2879                 (*cached_credits)--;
2880                 break;
2881         case RTE_EVENT_OP_FORWARD:
2882                 /* Check for outstanding_releases underflow. If this occurs,
2883                  * the application is not using the EVENT_OPs correctly; for
2884                  * example, forwarding or releasing events that were not
2885                  * dequeued.
2886                  */
2887                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2888                 ev_port->outstanding_releases--;
2889                 qm_port->issued_releases++;
2890                 (*cached_credits)--;
2891                 break;
2892         case RTE_EVENT_OP_RELEASE:
2893                 ev_port->inflight_credits++;
2894                 /* Check for outstanding_releases underflow. If this occurs,
2895                  * the application is not using the EVENT_OPs correctly; for
2896                  * example, forwarding or releasing events that were not
2897                  * dequeued.
2898                  */
2899                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2900                 ev_port->outstanding_releases--;
2901                 qm_port->issued_releases++;
2902
2903                 /* Replenish s/w credits if enough are cached */
2904                 dlb2_replenish_sw_credits(dlb2, ev_port);
2905                 break;
2906         }
2907
2908         DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2909         DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2910
2911 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2912         if (ev->op != RTE_EVENT_OP_RELEASE) {
2913                 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2914                 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2915         }
2916 #endif
2917
2918         return 0;
2919 }
2920
2921 static inline uint16_t
2922 __dlb2_event_enqueue_burst(void *event_port,
2923                            const struct rte_event events[],
2924                            uint16_t num,
2925                            bool use_delayed)
2926 {
2927         struct dlb2_eventdev_port *ev_port = event_port;
2928         struct dlb2_port *qm_port = &ev_port->qm_port;
2929         struct process_local_port_data *port_data;
2930         int retries = ev_port->enq_retries;
2931         int i;
2932
2933         RTE_ASSERT(ev_port->enq_configured);
2934         RTE_ASSERT(events != NULL);
2935
2936         i = 0;
2937
2938         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2939
2940         while (i < num) {
2941                 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2942                 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2943                 int pop_offs = 0;
2944                 int j = 0;
2945
2946                 memset(qm_port->qe4,
2947                        0,
2948                        DLB2_NUM_QES_PER_CACHE_LINE *
2949                        sizeof(struct dlb2_enqueue_qe));
2950
2951                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2952                         const struct rte_event *ev = &events[i + j];
2953                         int16_t thresh = qm_port->token_pop_thresh;
2954                         int ret;
2955
2956                         if (use_delayed &&
2957                             qm_port->token_pop_mode == DELAYED_POP &&
2958                             (ev->op == RTE_EVENT_OP_FORWARD ||
2959                              ev->op == RTE_EVENT_OP_RELEASE) &&
2960                             qm_port->issued_releases >= thresh - 1) {
2961                                 /* Insert the token pop QE and break out. This
2962                                  * may result in a partial HCW, but that is
2963                                  * simpler than supporting arbitrary QE
2964                                  * insertion.
2965                                  */
2966                                 dlb2_construct_token_pop_qe(qm_port, j);
2967
2968                                 /* Reset the releases for the next QE batch */
2969                                 qm_port->issued_releases -= thresh;
2970
2971                                 pop_offs = 1;
2972                                 j++;
2973                                 break;
2974                         }
2975
2976                         /*
2977                          * Retry if insufficient credits
2978                          */
2979                         do {
2980                                 ret = dlb2_event_enqueue_prep(ev_port,
2981                                                               qm_port,
2982                                                               ev,
2983                                                               &sched_types[j],
2984                                                               &queue_ids[j]);
2985                         } while ((ret == -ENOSPC) && (retries-- > 0));
2986
2987                         if (ret != 0)
2988                                 break;
2989                 }
2990
2991                 if (j == 0)
2992                         break;
2993
2994                 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2995                                       sched_types, queue_ids);
2996
2997                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2998
2999                 /* Don't include the token pop QE in the enqueue count */
3000                 i += j - pop_offs;
3001
3002                 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3003                  * pop_offs != 0
3004                  */
3005                 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3006                         break;
3007         }
3008
3009         return i;
3010 }
3011
3012 static uint16_t
3013 dlb2_event_enqueue_burst(void *event_port,
3014                              const struct rte_event events[],
3015                              uint16_t num)
3016 {
3017         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3018 }
3019
3020 static uint16_t
3021 dlb2_event_enqueue_burst_delayed(void *event_port,
3022                                      const struct rte_event events[],
3023                                      uint16_t num)
3024 {
3025         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3026 }
3027
3028 static inline uint16_t
3029 dlb2_event_enqueue(void *event_port,
3030                    const struct rte_event events[])
3031 {
3032         return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3033 }
3034
3035 static inline uint16_t
3036 dlb2_event_enqueue_delayed(void *event_port,
3037                            const struct rte_event events[])
3038 {
3039         return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3040 }
3041
3042 static uint16_t
3043 dlb2_event_enqueue_new_burst(void *event_port,
3044                              const struct rte_event events[],
3045                              uint16_t num)
3046 {
3047         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3048 }
3049
3050 static uint16_t
3051 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3052                                      const struct rte_event events[],
3053                                      uint16_t num)
3054 {
3055         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3056 }
3057
3058 static uint16_t
3059 dlb2_event_enqueue_forward_burst(void *event_port,
3060                                  const struct rte_event events[],
3061                                  uint16_t num)
3062 {
3063         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3064 }
3065
3066 static uint16_t
3067 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3068                                          const struct rte_event events[],
3069                                          uint16_t num)
3070 {
3071         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3072 }
3073
3074 static void
3075 dlb2_event_release(struct dlb2_eventdev *dlb2,
3076                    uint8_t port_id,
3077                    int n)
3078 {
3079         struct process_local_port_data *port_data;
3080         struct dlb2_eventdev_port *ev_port;
3081         struct dlb2_port *qm_port;
3082         int i;
3083
3084         if (port_id > dlb2->num_ports) {
3085                 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3086                              port_id);
3087                 rte_errno = -EINVAL;
3088                 return;
3089         }
3090
3091         ev_port = &dlb2->ev_ports[port_id];
3092         qm_port = &ev_port->qm_port;
3093         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3094
3095         i = 0;
3096
3097         if (qm_port->is_directed) {
3098                 i = n;
3099                 goto sw_credit_update;
3100         }
3101
3102         while (i < n) {
3103                 int pop_offs = 0;
3104                 int j = 0;
3105
3106                 /* Zero-out QEs */
3107                 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3108                 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3109                 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3110                 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3111
3112
3113                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3114                         int16_t thresh = qm_port->token_pop_thresh;
3115
3116                         if (qm_port->token_pop_mode == DELAYED_POP &&
3117                             qm_port->issued_releases >= thresh - 1) {
3118                                 /* Insert the token pop QE */
3119                                 dlb2_construct_token_pop_qe(qm_port, j);
3120
3121                                 /* Reset the releases for the next QE batch */
3122                                 qm_port->issued_releases -= thresh;
3123
3124                                 pop_offs = 1;
3125                                 j++;
3126                                 break;
3127                         }
3128
3129                         qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3130                         qm_port->issued_releases++;
3131                 }
3132
3133                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3134
3135                 /* Don't include the token pop QE in the release count */
3136                 i += j - pop_offs;
3137         }
3138
3139 sw_credit_update:
3140         /* each release returns one credit */
3141         if (unlikely(!ev_port->outstanding_releases)) {
3142                 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3143                              __func__);
3144                 return;
3145         }
3146         ev_port->outstanding_releases -= i;
3147         ev_port->inflight_credits += i;
3148
3149         /* Replenish s/w credits if enough releases are performed */
3150         dlb2_replenish_sw_credits(dlb2, ev_port);
3151 }
3152
3153 static inline void
3154 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3155 {
3156         uint32_t batch_size = qm_port->hw_credit_quanta;
3157
3158         /* increment port credits, and return to pool if exceeds threshold */
3159         if (!qm_port->is_directed) {
3160                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3161                         qm_port->cached_ldb_credits += num;
3162                         if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3163                                 __atomic_fetch_add(
3164                                         qm_port->credit_pool[DLB2_LDB_QUEUE],
3165                                         batch_size, __ATOMIC_SEQ_CST);
3166                                 qm_port->cached_ldb_credits -= batch_size;
3167                         }
3168                 } else {
3169                         qm_port->cached_credits += num;
3170                         if (qm_port->cached_credits >= 2 * batch_size) {
3171                                 __atomic_fetch_add(
3172                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3173                                       batch_size, __ATOMIC_SEQ_CST);
3174                                 qm_port->cached_credits -= batch_size;
3175                         }
3176                 }
3177         } else {
3178                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3179                         qm_port->cached_dir_credits += num;
3180                         if (qm_port->cached_dir_credits >= 2 * batch_size) {
3181                                 __atomic_fetch_add(
3182                                         qm_port->credit_pool[DLB2_DIR_QUEUE],
3183                                         batch_size, __ATOMIC_SEQ_CST);
3184                                 qm_port->cached_dir_credits -= batch_size;
3185                         }
3186                 } else {
3187                         qm_port->cached_credits += num;
3188                         if (qm_port->cached_credits >= 2 * batch_size) {
3189                                 __atomic_fetch_add(
3190                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3191                                       batch_size, __ATOMIC_SEQ_CST);
3192                                 qm_port->cached_credits -= batch_size;
3193                         }
3194                 }
3195         }
3196 }
3197
3198 #define CLB_MASK_IDX 0
3199 #define CLB_VAL_IDX 1
3200 static int
3201 dlb2_monitor_callback(const uint64_t val,
3202                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3203 {
3204         /* abort if the value matches */
3205         return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3206 }
3207
3208 static inline int
3209 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3210                   struct dlb2_eventdev_port *ev_port,
3211                   struct dlb2_port *qm_port,
3212                   uint64_t timeout,
3213                   uint64_t start_ticks)
3214 {
3215         struct process_local_port_data *port_data;
3216         uint64_t elapsed_ticks;
3217
3218         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3219
3220         elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3221
3222         /* Wait/poll time expired */
3223         if (elapsed_ticks >= timeout) {
3224                 return 1;
3225         } else if (dlb2->umwait_allowed) {
3226                 struct rte_power_monitor_cond pmc;
3227                 volatile struct dlb2_dequeue_qe *cq_base;
3228                 union {
3229                         uint64_t raw_qe[2];
3230                         struct dlb2_dequeue_qe qe;
3231                 } qe_mask;
3232                 uint64_t expected_value;
3233                 volatile uint64_t *monitor_addr;
3234
3235                 qe_mask.qe.cq_gen = 1; /* set mask */
3236
3237                 cq_base = port_data->cq_base;
3238                 monitor_addr = (volatile uint64_t *)(volatile void *)
3239                         &cq_base[qm_port->cq_idx];
3240                 monitor_addr++; /* cq_gen bit is in second 64bit location */
3241
3242                 if (qm_port->gen_bit)
3243                         expected_value = qe_mask.raw_qe[1];
3244                 else
3245                         expected_value = 0;
3246
3247                 pmc.addr = monitor_addr;
3248                 /* store expected value and comparison mask in opaque data */
3249                 pmc.opaque[CLB_VAL_IDX] = expected_value;
3250                 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3251                 /* set up callback */
3252                 pmc.fn = dlb2_monitor_callback;
3253                 pmc.size = sizeof(uint64_t);
3254
3255                 rte_power_monitor(&pmc, timeout + start_ticks);
3256
3257                 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3258         } else {
3259                 uint64_t poll_interval = dlb2->poll_interval;
3260                 uint64_t curr_ticks = rte_get_timer_cycles();
3261                 uint64_t init_ticks = curr_ticks;
3262
3263                 while ((curr_ticks - start_ticks < timeout) &&
3264                        (curr_ticks - init_ticks < poll_interval))
3265                         curr_ticks = rte_get_timer_cycles();
3266         }
3267
3268         return 0;
3269 }
3270
3271 static __rte_noinline int
3272 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3273                          struct dlb2_port *qm_port,
3274                          struct rte_event *events,
3275                          struct dlb2_dequeue_qe *qes,
3276                          int cnt)
3277 {
3278         uint8_t *qid_mappings = qm_port->qid_mappings;
3279         int i, num, evq_id;
3280
3281         for (i = 0, num = 0; i < cnt; i++) {
3282                 struct dlb2_dequeue_qe *qe = &qes[i];
3283                 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3284                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3285                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3286                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3287                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3288                 };
3289
3290                 /* Fill in event information.
3291                  * Note that flow_id must be embedded in the data by
3292                  * the app, such as the mbuf RSS hash field if the data
3293                  * buffer is a mbuf.
3294                  */
3295                 if (unlikely(qe->error)) {
3296                         DLB2_LOG_ERR("QE error bit ON\n");
3297                         DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3298                         dlb2_consume_qe_immediate(qm_port, 1);
3299                         continue; /* Ignore */
3300                 }
3301
3302                 events[num].u64 = qe->data;
3303                 events[num].flow_id = qe->flow_id;
3304                 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3305                 events[num].event_type = qe->u.event_type.major;
3306                 events[num].sub_event_type = qe->u.event_type.sub;
3307                 events[num].sched_type = sched_type_map[qe->sched_type];
3308                 events[num].impl_opaque = qe->qid_depth;
3309
3310                 /* qid not preserved for directed queues */
3311                 if (qm_port->is_directed)
3312                         evq_id = ev_port->link[0].queue_id;
3313                 else
3314                         evq_id = qid_mappings[qe->qid];
3315
3316                 events[num].queue_id = evq_id;
3317                 DLB2_INC_STAT(
3318                         ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3319                         1);
3320                 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3321                 num++;
3322         }
3323
3324         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3325
3326         return num;
3327 }
3328
3329 static inline int
3330 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3331                               struct dlb2_port *qm_port,
3332                               struct rte_event *events,
3333                               struct dlb2_dequeue_qe *qes)
3334 {
3335         int sched_type_map[] = {
3336                 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3337                 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3338                 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3339                 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3340         };
3341         const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3342         uint8_t *qid_mappings = qm_port->qid_mappings;
3343         __m128i sse_evt[2];
3344
3345         /* In the unlikely case that any of the QE error bits are set, process
3346          * them one at a time.
3347          */
3348         if (unlikely(qes[0].error || qes[1].error ||
3349                      qes[2].error || qes[3].error))
3350                 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3351                                                  qes, num_events);
3352
3353         events[0].u64 = qes[0].data;
3354         events[1].u64 = qes[1].data;
3355         events[2].u64 = qes[2].data;
3356         events[3].u64 = qes[3].data;
3357
3358         /* Construct the metadata portion of two struct rte_events
3359          * in one 128b SSE register. Event metadata is constructed in the SSE
3360          * registers like so:
3361          * sse_evt[0][63:0]:   event[0]'s metadata
3362          * sse_evt[0][127:64]: event[1]'s metadata
3363          * sse_evt[1][63:0]:   event[2]'s metadata
3364          * sse_evt[1][127:64]: event[3]'s metadata
3365          */
3366         sse_evt[0] = _mm_setzero_si128();
3367         sse_evt[1] = _mm_setzero_si128();
3368
3369         /* Convert the hardware queue ID to an event queue ID and store it in
3370          * the metadata:
3371          * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
3372          * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3373          * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
3374          * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3375          */
3376 #define DLB_EVENT_QUEUE_ID_BYTE 5
3377         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3378                                      qid_mappings[qes[0].qid],
3379                                      DLB_EVENT_QUEUE_ID_BYTE);
3380         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3381                                      qid_mappings[qes[1].qid],
3382                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3383         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3384                                      qid_mappings[qes[2].qid],
3385                                      DLB_EVENT_QUEUE_ID_BYTE);
3386         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3387                                      qid_mappings[qes[3].qid],
3388                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3389
3390         /* Convert the hardware priority to an event priority and store it in
3391          * the metadata, while also returning the queue depth status
3392          * value captured by the hardware, storing it in impl_opaque, which can
3393          * be read by the application but not modified
3394          * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
3395          * sse_evt[0][63:56]   = qes[0].qid_depth
3396          * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3397          * sse_evt[0][127:120] = qes[1].qid_depth
3398          * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
3399          * sse_evt[1][63:56]   = qes[2].qid_depth
3400          * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3401          * sse_evt[1][127:120] = qes[3].qid_depth
3402          */
3403 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3404 #define DLB_BYTE_SHIFT 8
3405         sse_evt[0] =
3406                 _mm_insert_epi16(sse_evt[0],
3407                         DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3408                         (qes[0].qid_depth << DLB_BYTE_SHIFT),
3409                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3410         sse_evt[0] =
3411                 _mm_insert_epi16(sse_evt[0],
3412                         DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3413                         (qes[1].qid_depth << DLB_BYTE_SHIFT),
3414                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3415         sse_evt[1] =
3416                 _mm_insert_epi16(sse_evt[1],
3417                         DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3418                         (qes[2].qid_depth << DLB_BYTE_SHIFT),
3419                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3420         sse_evt[1] =
3421                 _mm_insert_epi16(sse_evt[1],
3422                         DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3423                         (qes[3].qid_depth << DLB_BYTE_SHIFT),
3424                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3425
3426         /* Write the event type, sub event type, and flow_id to the event
3427          * metadata.
3428          * sse_evt[0][31:0]   = qes[0].flow_id |
3429          *                      qes[0].u.event_type.major << 28 |
3430          *                      qes[0].u.event_type.sub << 20;
3431          * sse_evt[0][95:64]  = qes[1].flow_id |
3432          *                      qes[1].u.event_type.major << 28 |
3433          *                      qes[1].u.event_type.sub << 20;
3434          * sse_evt[1][31:0]   = qes[2].flow_id |
3435          *                      qes[2].u.event_type.major << 28 |
3436          *                      qes[2].u.event_type.sub << 20;
3437          * sse_evt[1][95:64]  = qes[3].flow_id |
3438          *                      qes[3].u.event_type.major << 28 |
3439          *                      qes[3].u.event_type.sub << 20;
3440          */
3441 #define DLB_EVENT_EV_TYPE_DW 0
3442 #define DLB_EVENT_EV_TYPE_SHIFT 28
3443 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3444         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3445                         qes[0].flow_id |
3446                         qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3447                         qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3448                         DLB_EVENT_EV_TYPE_DW);
3449         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3450                         qes[1].flow_id |
3451                         qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3452                         qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3453                         DLB_EVENT_EV_TYPE_DW + 2);
3454         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3455                         qes[2].flow_id |
3456                         qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3457                         qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3458                         DLB_EVENT_EV_TYPE_DW);
3459         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3460                         qes[3].flow_id |
3461                         qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
3462                         qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3463                         DLB_EVENT_EV_TYPE_DW + 2);
3464
3465         /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3466          * set:
3467          * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
3468          * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3469          * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
3470          * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3471          */
3472 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3473 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3474         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3475                 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3476                 DLB_EVENT_SCHED_TYPE_BYTE);
3477         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3478                 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3479                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3480         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3481                 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3482                 DLB_EVENT_SCHED_TYPE_BYTE);
3483         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3484                 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3485                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3486
3487         /* Store the metadata to the event (use the double-precision
3488          * _mm_storeh_pd because there is no integer function for storing the
3489          * upper 64b):
3490          * events[0].event = sse_evt[0][63:0]
3491          * events[1].event = sse_evt[0][127:64]
3492          * events[2].event = sse_evt[1][63:0]
3493          * events[3].event = sse_evt[1][127:64]
3494          */
3495         _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3496         _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3497         _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3498         _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3499
3500         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3501         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3502         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3503         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3504
3505         DLB2_INC_STAT(
3506                 ev_port->stats.queue[events[0].queue_id].
3507                         qid_depth[qes[0].qid_depth],
3508                 1);
3509         DLB2_INC_STAT(
3510                 ev_port->stats.queue[events[1].queue_id].
3511                         qid_depth[qes[1].qid_depth],
3512                 1);
3513         DLB2_INC_STAT(
3514                 ev_port->stats.queue[events[2].queue_id].
3515                         qid_depth[qes[2].qid_depth],
3516                 1);
3517         DLB2_INC_STAT(
3518                 ev_port->stats.queue[events[3].queue_id].
3519                         qid_depth[qes[3].qid_depth],
3520                 1);
3521
3522         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3523
3524         return num_events;
3525 }
3526
3527 static __rte_always_inline int
3528 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3529 {
3530         volatile struct dlb2_dequeue_qe *cq_addr;
3531         uint8_t xor_mask[2] = {0x0F, 0x00};
3532         const uint8_t and_mask = 0x0F;
3533         __m128i *qes = (__m128i *)qe;
3534         uint8_t gen_bits, gen_bit;
3535         uintptr_t addr[4];
3536         uint16_t idx;
3537
3538         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3539
3540         idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3541         /* Load the next 4 QEs */
3542         addr[0] = (uintptr_t)&cq_addr[idx];
3543         addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3544         addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3545         addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3546
3547         /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3548         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3549         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3550         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3551         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3552
3553         /* Correct the xor_mask for wrap-around QEs */
3554         gen_bit = qm_port->gen_bit;
3555         xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3556         xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3557         xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3558
3559         /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3560          * valid, then QEs[0:N-1] are too.
3561          */
3562         qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3563         rte_compiler_barrier();
3564         qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3565         rte_compiler_barrier();
3566         qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3567         rte_compiler_barrier();
3568         qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3569
3570         /* Extract and combine the gen bits */
3571         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3572                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3573                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3574                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3575
3576         /* XOR the combined bits such that a 1 represents a valid QE */
3577         gen_bits ^= xor_mask[gen_bit];
3578
3579         /* Mask off gen bits we don't care about */
3580         gen_bits &= and_mask;
3581
3582         return __builtin_popcount(gen_bits);
3583 }
3584
3585 static inline void
3586 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3587                           struct rte_event *events,
3588                           __m128i v_qe_3,
3589                           __m128i v_qe_2,
3590                           __m128i v_qe_1,
3591                           __m128i v_qe_0,
3592                           __m128i v_qe_meta,
3593                           __m128i v_qe_status,
3594                           uint32_t valid_events)
3595 {
3596         /* Look up the event QIDs, using the hardware QIDs to index the
3597          * port's QID mapping.
3598          *
3599          * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3600          * passed along in registers as the QE data is required later.
3601          *
3602          * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3603          * 32-bit slice of each QE, so makes up a full SSE register. This
3604          * allows parallel processing of 4x QEs in a single register.
3605          */
3606
3607         __m128i v_qid_done = {0};
3608         int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3609         int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3610         int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3611         int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3612
3613         int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3614         int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3615         int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3616         int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3617
3618         int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3619         int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3620         int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3621         int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3622
3623         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3624         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3625         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3626         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3627
3628         /* Schedule field remapping using byte shuffle
3629          * - Full byte containing sched field handled here (op, rsvd are zero)
3630          * - Note sanitizing the register requires two masking ANDs:
3631          *   1) to strip prio/msg_type from byte for correct shuffle lookup
3632          *   2) to strip any non-sched-field lanes from any results to OR later
3633          * - Final byte result is >> 10 to another byte-lane inside the u32.
3634          *   This makes the final combination OR easier to make the rte_event.
3635          */
3636         __m128i v_sched_done;
3637         __m128i v_sched_bits;
3638         {
3639                 static const uint8_t sched_type_map[16] = {
3640                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3641                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3642                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3643                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3644                 };
3645                 static const uint8_t sched_and_mask[16] = {
3646                         0x00, 0x00, 0x00, 0x03,
3647                         0x00, 0x00, 0x00, 0x03,
3648                         0x00, 0x00, 0x00, 0x03,
3649                         0x00, 0x00, 0x00, 0x03,
3650                 };
3651                 const __m128i v_sched_map = _mm_loadu_si128(
3652                                              (const __m128i *)sched_type_map);
3653                 __m128i v_sched_mask = _mm_loadu_si128(
3654                                              (const __m128i *)&sched_and_mask);
3655                 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3656                 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3657                                                             v_sched_bits);
3658                 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3659                                                    v_sched_mask);
3660                 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3661         }
3662
3663         /* Priority handling
3664          * - QE provides 3 bits of priority
3665          * - Shift << 3 to move to MSBs for byte-prio in rte_event
3666          * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3667          */
3668         __m128i v_prio_done;
3669         {
3670                 static const uint8_t prio_mask[16] = {
3671                         0x00, 0x00, 0x00, 0x07 << 5,
3672                         0x00, 0x00, 0x00, 0x07 << 5,
3673                         0x00, 0x00, 0x00, 0x07 << 5,
3674                         0x00, 0x00, 0x00, 0x07 << 5,
3675                 };
3676                 __m128i v_prio_mask  = _mm_loadu_si128(
3677                                                 (const __m128i *)prio_mask);
3678                 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3679                 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3680         }
3681
3682         /* Event Sub/Type handling:
3683          * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3684          * to get the sub/ev type data into rte_event location, clearing the
3685          * lower 20 bits in the process.
3686          */
3687         __m128i v_types_done;
3688         {
3689                 static const uint8_t event_mask[16] = {
3690                         0x0f, 0x00, 0x00, 0x00,
3691                         0x0f, 0x00, 0x00, 0x00,
3692                         0x0f, 0x00, 0x00, 0x00,
3693                         0x0f, 0x00, 0x00, 0x00,
3694                 };
3695                 static const uint8_t sub_event_mask[16] = {
3696                         0xff, 0x00, 0x00, 0x00,
3697                         0xff, 0x00, 0x00, 0x00,
3698                         0xff, 0x00, 0x00, 0x00,
3699                         0xff, 0x00, 0x00, 0x00,
3700                 };
3701                 static const uint8_t flow_mask[16] = {
3702                         0xff, 0xff, 0x00, 0x00,
3703                         0xff, 0xff, 0x00, 0x00,
3704                         0xff, 0xff, 0x00, 0x00,
3705                         0xff, 0xff, 0x00, 0x00,
3706                 };
3707                 __m128i v_event_mask  = _mm_loadu_si128(
3708                                         (const __m128i *)event_mask);
3709                 __m128i v_sub_event_mask  = _mm_loadu_si128(
3710                                         (const __m128i *)sub_event_mask);
3711                 __m128i v_flow_mask  = _mm_loadu_si128(
3712                                        (const __m128i *)flow_mask);
3713                 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3714                 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3715                 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3716                 v_type = _mm_slli_epi32(v_type, 8);
3717                 v_types_done = _mm_or_si128(v_type, v_sub);
3718                 v_types_done = _mm_slli_epi32(v_types_done, 20);
3719                 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3720                 v_types_done = _mm_or_si128(v_types_done, v_flow);
3721         }
3722
3723         /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3724          * with the rte_event, allowing unpacks to move/blend with payload.
3725          */
3726         __m128i v_q_s_p_done;
3727         {
3728                 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3729                 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3730                 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3731         }
3732
3733         __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3734
3735         /* Unpack evs into u64 metadata, then indiv events */
3736         v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3737         v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3738
3739         switch (valid_events) {
3740         case 4:
3741                 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3742                 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3743                 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3744                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3745                               1);
3746                 /* fallthrough */
3747         case 3:
3748                 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3749                 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3750                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3751                               1);
3752                 /* fallthrough */
3753         case 2:
3754                 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3755                 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3756                 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3757                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3758                               1);
3759                 /* fallthrough */
3760         case 1:
3761                 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3762                 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3763                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3764                               1);
3765         }
3766 }
3767
3768 static __rte_always_inline int
3769 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3770                         uint32_t max_events)
3771 {
3772         /* Using unmasked idx for perf, and masking manually */
3773         uint16_t idx = qm_port->cq_idx_unmasked;
3774         volatile struct dlb2_dequeue_qe *cq_addr;
3775
3776         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3777
3778         uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3779                                                  qm_port->cq_depth_mask];
3780         uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx +  8) &
3781                                                  qm_port->cq_depth_mask];
3782         uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx +  4) &
3783                                                  qm_port->cq_depth_mask];
3784         uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx +  0) &
3785                                                  qm_port->cq_depth_mask];
3786
3787         /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3788         __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3789         rte_compiler_barrier();
3790         __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3791         rte_compiler_barrier();
3792         __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3793         rte_compiler_barrier();
3794         __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3795
3796         /* Generate the pkt_shuffle mask;
3797          * - Avoids load in otherwise load-heavy section of code
3798          * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3799          */
3800         const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3801         __m128i v_zeros = _mm_setzero_si128();
3802         __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3803         __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3804
3805         /* Extract u32 components required from the QE
3806          * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3807          * - QE[96 to 127] for status (cq gen bit, error)
3808          *
3809          * Note that stage 1 of the unpacking is re-used for both u32 extracts
3810          */
3811         __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3812         __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3813         __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3814         __m128i v_qe_meta   = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3815
3816         /* Status byte (gen_bit, error) handling:
3817          * - Shuffle to lanes 0,1,2,3, clear all others
3818          * - Shift right by 7 for gen bit to MSB, movemask to scalar
3819          * - Shift right by 2 for error bit to MSB, movemask to scalar
3820          */
3821         __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3822         __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3823         int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3824
3825         /* Expected vs Reality of QE Gen bits
3826          * - cq_rolling_mask provides expected bits
3827          * - QE loads, unpacks/shuffle and movemask provides reality
3828          * - XOR of the two gives bitmask of new packets
3829          * - POPCNT to get the number of new events
3830          */
3831         uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3832         uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3833         uint32_t count_new = __builtin_popcount(qe_xor_bits);
3834         count_new = RTE_MIN(count_new, max_events);
3835         if (!count_new)
3836                 return 0;
3837
3838         /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3839
3840         uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3841         uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3842         uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3843         uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3844
3845         /* shifted out of m2 into MSB of m */
3846         qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3847
3848         /* shifted out of m "looped back" into MSB of m2 */
3849         qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3850
3851         /* Prefetch the next QEs - should run as IPC instead of cycles */
3852         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3853         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3854         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3855         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3856
3857         /* Convert QEs from XMM regs to events and store events directly */
3858         _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3859                                   v_qe_0, v_qe_meta, v_qe_status, count_new);
3860
3861         return count_new;
3862 }
3863
3864 static inline void
3865 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3866 {
3867         uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3868
3869         qm_port->cq_idx_unmasked = idx;
3870         qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3871         qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3872 }
3873
3874 static inline int16_t
3875 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3876                        struct dlb2_eventdev_port *ev_port,
3877                        struct rte_event *events,
3878                        uint16_t max_num,
3879                        uint64_t dequeue_timeout_ticks)
3880 {
3881         uint64_t start_ticks = 0ULL;
3882         struct dlb2_port *qm_port;
3883         int num = 0;
3884         bool use_scalar;
3885         uint64_t timeout;
3886
3887         qm_port = &ev_port->qm_port;
3888         use_scalar = qm_port->use_scalar;
3889
3890         if (!dlb2->global_dequeue_wait)
3891                 timeout = dequeue_timeout_ticks;
3892         else
3893                 timeout = dlb2->global_dequeue_wait_ticks;
3894
3895         start_ticks = rte_get_timer_cycles();
3896
3897         use_scalar = use_scalar || (max_num & 0x3);
3898
3899         while (num < max_num) {
3900                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3901                 int num_avail;
3902
3903                 if (use_scalar) {
3904                         int n_iter = 0;
3905                         uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3906
3907                         num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3908                         num_avail = RTE_MIN(num_avail, max_num - num);
3909                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3910                         if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3911                                 n_iter = dlb2_process_dequeue_four_qes(ev_port,
3912                                                                 qm_port,
3913                                                                 &events[num],
3914                                                                 &qes[0]);
3915                         else if (num_avail)
3916                                 n_iter = dlb2_process_dequeue_qes(ev_port,
3917                                                                 qm_port,
3918                                                                 &events[num],
3919                                                                 &qes[0],
3920                                                                 num_avail);
3921                         if (n_iter != 0) {
3922                                 num += n_iter;
3923                                 /* update rolling_mask for vector code support */
3924                                 m_rshift = qm_port->cq_rolling_mask >> n_iter;
3925                                 m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3926                                 m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3927                                 m2_lshift = qm_port->cq_rolling_mask_2 <<
3928                                         (64 - n_iter);
3929                                 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3930                                 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3931                         }
3932                 } else { /* !use_scalar */
3933                         num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3934                                                             &events[num],
3935                                                             max_num - num);
3936                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3937                         num += num_avail;
3938                         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3939                 }
3940                 if (!num_avail) {
3941                         if ((timeout == 0) || (num > 0))
3942                                 /* Not waiting in any form or 1+ events recd */
3943                                 break;
3944                         else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3945                                                    timeout, start_ticks))
3946                                 break;
3947                 }
3948         }
3949
3950         qm_port->owed_tokens += num;
3951
3952         if (num) {
3953                 if (qm_port->token_pop_mode == AUTO_POP)
3954                         dlb2_consume_qe_immediate(qm_port, num);
3955
3956                 ev_port->outstanding_releases += num;
3957
3958                 dlb2_port_credits_inc(qm_port, num);
3959         }
3960
3961         return num;
3962 }
3963
3964 static __rte_always_inline int
3965 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3966              uint8_t *offset)
3967 {
3968         uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3969                                    {0x00, 0x01, 0x03, 0x07} };
3970         uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3971         volatile struct dlb2_dequeue_qe *cq_addr;
3972         __m128i *qes = (__m128i *)qe;
3973         uint64_t *cache_line_base;
3974         uint8_t gen_bits;
3975
3976         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3977         cq_addr = &cq_addr[qm_port->cq_idx];
3978
3979         cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3980         *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3981
3982         /* Load the next CQ cache line from memory. Pack these reads as tight
3983          * as possible to reduce the chance that DLB invalidates the line while
3984          * the CPU is reading it. Read the cache line backwards to ensure that
3985          * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3986          *
3987          * (Valid QEs start at &qe[offset])
3988          */
3989         qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3990         qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3991         qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3992         qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3993
3994         /* Evict the cache line ASAP */
3995         rte_cldemote(cache_line_base);
3996
3997         /* Extract and combine the gen bits */
3998         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3999                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
4000                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
4001                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4002
4003         /* XOR the combined bits such that a 1 represents a valid QE */
4004         gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4005
4006         /* Mask off gen bits we don't care about */
4007         gen_bits &= and_mask[*offset];
4008
4009         return __builtin_popcount(gen_bits);
4010 }
4011
4012 static inline int16_t
4013 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4014                 struct dlb2_eventdev_port *ev_port,
4015                 struct rte_event *events,
4016                 uint16_t max_num,
4017                 uint64_t dequeue_timeout_ticks)
4018 {
4019         uint64_t timeout;
4020         uint64_t start_ticks = 0ULL;
4021         struct dlb2_port *qm_port;
4022         int num = 0;
4023
4024         qm_port = &ev_port->qm_port;
4025
4026         /* We have a special implementation for waiting. Wait can be:
4027          * 1) no waiting at all
4028          * 2) busy poll only
4029          * 3) wait for interrupt. If wakeup and poll time
4030          * has expired, then return to caller
4031          * 4) umonitor/umwait repeatedly up to poll time
4032          */
4033
4034         /* If configured for per dequeue wait, then use wait value provided
4035          * to this API. Otherwise we must use the global
4036          * value from eventdev config time.
4037          */
4038         if (!dlb2->global_dequeue_wait)
4039                 timeout = dequeue_timeout_ticks;
4040         else
4041                 timeout = dlb2->global_dequeue_wait_ticks;
4042
4043         start_ticks = rte_get_timer_cycles();
4044
4045         while (num < max_num) {
4046                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4047                 uint8_t offset;
4048                 int num_avail;
4049
4050                 /* Copy up to 4 QEs from the current cache line into qes */
4051                 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4052
4053                 /* But don't process more than the user requested */
4054                 num_avail = RTE_MIN(num_avail, max_num - num);
4055
4056                 dlb2_inc_cq_idx(qm_port, num_avail);
4057
4058                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4059                         num += dlb2_process_dequeue_four_qes(ev_port,
4060                                                              qm_port,
4061                                                              &events[num],
4062                                                              &qes[offset]);
4063                 else if (num_avail)
4064                         num += dlb2_process_dequeue_qes(ev_port,
4065                                                         qm_port,
4066                                                         &events[num],
4067                                                         &qes[offset],
4068                                                         num_avail);
4069                 else if ((timeout == 0) || (num > 0))
4070                         /* Not waiting in any form, or 1+ events received? */
4071                         break;
4072                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4073                                            timeout, start_ticks))
4074                         break;
4075         }
4076
4077         qm_port->owed_tokens += num;
4078
4079         if (num) {
4080                 if (qm_port->token_pop_mode == AUTO_POP)
4081                         dlb2_consume_qe_immediate(qm_port, num);
4082
4083                 ev_port->outstanding_releases += num;
4084
4085                 dlb2_port_credits_inc(qm_port, num);
4086         }
4087
4088         return num;
4089 }
4090
4091 static uint16_t
4092 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4093                          uint64_t wait)
4094 {
4095         struct dlb2_eventdev_port *ev_port = event_port;
4096         struct dlb2_port *qm_port = &ev_port->qm_port;
4097         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4098         uint16_t cnt;
4099
4100         RTE_ASSERT(ev_port->setup_done);
4101         RTE_ASSERT(ev != NULL);
4102
4103         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4104                 uint16_t out_rels = ev_port->outstanding_releases;
4105
4106                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4107
4108                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4109         }
4110
4111         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4112                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4113
4114         cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4115
4116         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4117         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4118
4119         return cnt;
4120 }
4121
4122 static uint16_t
4123 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4124 {
4125         return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4126 }
4127
4128 static uint16_t
4129 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4130                                 uint16_t num, uint64_t wait)
4131 {
4132         struct dlb2_eventdev_port *ev_port = event_port;
4133         struct dlb2_port *qm_port = &ev_port->qm_port;
4134         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4135         uint16_t cnt;
4136
4137         RTE_ASSERT(ev_port->setup_done);
4138         RTE_ASSERT(ev != NULL);
4139
4140         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4141                 uint16_t out_rels = ev_port->outstanding_releases;
4142
4143                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4144
4145                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4146         }
4147
4148         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4149                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4150
4151         cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4152
4153         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4154         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4155         return cnt;
4156 }
4157
4158 static uint16_t
4159 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4160                           uint64_t wait)
4161 {
4162         return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4163 }
4164
4165 static void
4166 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4167 {
4168         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4169         eventdev_stop_flush_t flush;
4170         struct rte_event ev;
4171         uint8_t dev_id;
4172         void *arg;
4173         int i;
4174
4175         flush = dev->dev_ops->dev_stop_flush;
4176         dev_id = dev->data->dev_id;
4177         arg = dev->data->dev_stop_flush_arg;
4178
4179         while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4180                 if (flush)
4181                         flush(dev_id, ev, arg);
4182
4183                 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4184                         continue;
4185
4186                 ev.op = RTE_EVENT_OP_RELEASE;
4187
4188                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4189         }
4190
4191         /* Enqueue any additional outstanding releases */
4192         ev.op = RTE_EVENT_OP_RELEASE;
4193
4194         for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4195                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4196 }
4197
4198 static uint32_t
4199 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4200                          struct dlb2_eventdev_queue *queue)
4201 {
4202         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4203         struct dlb2_get_ldb_queue_depth_args cfg;
4204         int ret;
4205
4206         cfg.queue_id = queue->qm_queue.id;
4207
4208         ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4209         if (ret < 0) {
4210                 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4211                              ret, dlb2_error_strings[cfg.response.status]);
4212                 return ret;
4213         }
4214
4215         return cfg.response.id;
4216 }
4217
4218 static uint32_t
4219 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4220                          struct dlb2_eventdev_queue *queue)
4221 {
4222         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4223         struct dlb2_get_dir_queue_depth_args cfg;
4224         int ret;
4225
4226         cfg.queue_id = queue->qm_queue.id;
4227
4228         ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4229         if (ret < 0) {
4230                 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4231                              ret, dlb2_error_strings[cfg.response.status]);
4232                 return ret;
4233         }
4234
4235         return cfg.response.id;
4236 }
4237
4238 uint32_t
4239 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4240                      struct dlb2_eventdev_queue *queue)
4241 {
4242         if (queue->qm_queue.is_directed)
4243                 return dlb2_get_dir_queue_depth(dlb2, queue);
4244         else
4245                 return dlb2_get_ldb_queue_depth(dlb2, queue);
4246 }
4247
4248 static bool
4249 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4250                     struct dlb2_eventdev_queue *queue)
4251 {
4252         return dlb2_get_queue_depth(dlb2, queue) == 0;
4253 }
4254
4255 static bool
4256 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4257 {
4258         int i;
4259
4260         for (i = 0; i < dlb2->num_queues; i++) {
4261                 if (dlb2->ev_queues[i].num_links == 0)
4262                         continue;
4263                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4264                         return false;
4265         }
4266
4267         return true;
4268 }
4269
4270 static bool
4271 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4272 {
4273         int i;
4274
4275         for (i = 0; i < dlb2->num_queues; i++) {
4276                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4277                         return false;
4278         }
4279
4280         return true;
4281 }
4282
4283 static void
4284 dlb2_drain(struct rte_eventdev *dev)
4285 {
4286         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4287         struct dlb2_eventdev_port *ev_port = NULL;
4288         uint8_t dev_id;
4289         int i;
4290
4291         dev_id = dev->data->dev_id;
4292
4293         while (!dlb2_linked_queues_empty(dlb2)) {
4294                 /* Flush all the ev_ports, which will drain all their connected
4295                  * queues.
4296                  */
4297                 for (i = 0; i < dlb2->num_ports; i++)
4298                         dlb2_flush_port(dev, i);
4299         }
4300
4301         /* The queues are empty, but there may be events left in the ports. */
4302         for (i = 0; i < dlb2->num_ports; i++)
4303                 dlb2_flush_port(dev, i);
4304
4305         /* If the domain's queues are empty, we're done. */
4306         if (dlb2_queues_empty(dlb2))
4307                 return;
4308
4309         /* Else, there must be at least one unlinked load-balanced queue.
4310          * Select a load-balanced port with which to drain the unlinked
4311          * queue(s).
4312          */
4313         for (i = 0; i < dlb2->num_ports; i++) {
4314                 ev_port = &dlb2->ev_ports[i];
4315
4316                 if (!ev_port->qm_port.is_directed)
4317                         break;
4318         }
4319
4320         if (i == dlb2->num_ports) {
4321                 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4322                 return;
4323         }
4324
4325         rte_errno = 0;
4326         rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4327
4328         if (rte_errno) {
4329                 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4330                              ev_port->id);
4331                 return;
4332         }
4333
4334         for (i = 0; i < dlb2->num_queues; i++) {
4335                 uint8_t qid, prio;
4336                 int ret;
4337
4338                 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4339                         continue;
4340
4341                 qid = i;
4342                 prio = 0;
4343
4344                 /* Link the ev_port to the queue */
4345                 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4346                 if (ret != 1) {
4347                         DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4348                                      ev_port->id, qid);
4349                         return;
4350                 }
4351
4352                 /* Flush the queue */
4353                 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4354                         dlb2_flush_port(dev, ev_port->id);
4355
4356                 /* Drain any extant events in the ev_port. */
4357                 dlb2_flush_port(dev, ev_port->id);
4358
4359                 /* Unlink the ev_port from the queue */
4360                 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4361                 if (ret != 1) {
4362                         DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4363                                      ev_port->id, qid);
4364                         return;
4365                 }
4366         }
4367 }
4368
4369 static void
4370 dlb2_eventdev_stop(struct rte_eventdev *dev)
4371 {
4372         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4373
4374         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4375
4376         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4377                 DLB2_LOG_DBG("Internal error: already stopped\n");
4378                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4379                 return;
4380         } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4381                 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4382                              (int)dlb2->run_state);
4383                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4384                 return;
4385         }
4386
4387         dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4388
4389         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4390
4391         dlb2_drain(dev);
4392
4393         dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4394 }
4395
4396 static int
4397 dlb2_eventdev_close(struct rte_eventdev *dev)
4398 {
4399         dlb2_hw_reset_sched_domain(dev, false);
4400
4401         return 0;
4402 }
4403
4404 static void
4405 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4406 {
4407         RTE_SET_USED(dev);
4408         RTE_SET_USED(id);
4409
4410         /* This function intentionally left blank. */
4411 }
4412
4413 static void
4414 dlb2_eventdev_port_release(void *port)
4415 {
4416         struct dlb2_eventdev_port *ev_port = port;
4417         struct dlb2_port *qm_port;
4418
4419         if (ev_port) {
4420                 qm_port = &ev_port->qm_port;
4421                 if (qm_port->config_state == DLB2_CONFIGURED)
4422                         dlb2_free_qe_mem(qm_port);
4423         }
4424 }
4425
4426 static int
4427 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4428                             uint64_t *timeout_ticks)
4429 {
4430         RTE_SET_USED(dev);
4431         uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4432
4433         *timeout_ticks = ns * cycles_per_ns;
4434
4435         return 0;
4436 }
4437
4438 static void
4439 dlb2_entry_points_init(struct rte_eventdev *dev)
4440 {
4441         struct dlb2_eventdev *dlb2;
4442
4443         /* Expose PMD's eventdev interface */
4444         static struct eventdev_ops dlb2_eventdev_entry_ops = {
4445                 .dev_infos_get    = dlb2_eventdev_info_get,
4446                 .dev_configure    = dlb2_eventdev_configure,
4447                 .dev_start        = dlb2_eventdev_start,
4448                 .dev_stop         = dlb2_eventdev_stop,
4449                 .dev_close        = dlb2_eventdev_close,
4450                 .queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
4451                 .queue_setup      = dlb2_eventdev_queue_setup,
4452                 .queue_release    = dlb2_eventdev_queue_release,
4453                 .port_def_conf    = dlb2_eventdev_port_default_conf_get,
4454                 .port_setup       = dlb2_eventdev_port_setup,
4455                 .port_release     = dlb2_eventdev_port_release,
4456                 .port_link        = dlb2_eventdev_port_link,
4457                 .port_unlink      = dlb2_eventdev_port_unlink,
4458                 .port_unlinks_in_progress =
4459                                     dlb2_eventdev_port_unlinks_in_progress,
4460                 .timeout_ticks    = dlb2_eventdev_timeout_ticks,
4461                 .dump             = dlb2_eventdev_dump,
4462                 .xstats_get       = dlb2_eventdev_xstats_get,
4463                 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4464                 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4465                 .xstats_reset       = dlb2_eventdev_xstats_reset,
4466                 .dev_selftest     = test_dlb2_eventdev,
4467         };
4468
4469         /* Expose PMD's eventdev interface */
4470
4471         dev->dev_ops = &dlb2_eventdev_entry_ops;
4472         dev->enqueue = dlb2_event_enqueue;
4473         dev->enqueue_burst = dlb2_event_enqueue_burst;
4474         dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4475         dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4476
4477         dlb2 = dev->data->dev_private;
4478         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4479                 dev->dequeue = dlb2_event_dequeue_sparse;
4480                 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4481         } else {
4482                 dev->dequeue = dlb2_event_dequeue;
4483                 dev->dequeue_burst = dlb2_event_dequeue_burst;
4484         }
4485 }
4486
4487 int
4488 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4489                             const char *name,
4490                             struct dlb2_devargs *dlb2_args)
4491 {
4492         struct dlb2_eventdev *dlb2;
4493         int err, i;
4494
4495         dlb2 = dev->data->dev_private;
4496
4497         dlb2->event_dev = dev; /* backlink */
4498
4499         evdev_dlb2_default_info.driver_name = name;
4500
4501         dlb2->max_num_events_override = dlb2_args->max_num_events;
4502         dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4503         dlb2->poll_interval = dlb2_args->poll_interval;
4504         dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4505         dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4506         dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4507         dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4508
4509
4510         if (dlb2_args->max_cq_depth != 0)
4511                 dlb2->max_cq_depth = dlb2_args->max_cq_depth;
4512         else
4513                 dlb2->max_cq_depth = DLB2_DEFAULT_CQ_DEPTH;
4514
4515         evdev_dlb2_default_info.max_event_port_dequeue_depth = dlb2->max_cq_depth;
4516
4517         err = dlb2_iface_open(&dlb2->qm_instance, name);
4518         if (err < 0) {
4519                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4520                              err);
4521                 return err;
4522         }
4523
4524         err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4525                                             &dlb2->revision);
4526         if (err < 0) {
4527                 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4528                              err);
4529                 return err;
4530         }
4531
4532         err = dlb2_hw_query_resources(dlb2);
4533         if (err) {
4534                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4535                              err, name);
4536                 return err;
4537         }
4538
4539         dlb2_iface_hardware_init(&dlb2->qm_instance);
4540
4541         /* configure class of service */
4542         {
4543                 struct dlb2_set_cos_bw_args
4544                         set_cos_bw_args = { {0} };
4545                 int id;
4546                 int ret = 0;
4547
4548                 for (id = 0; id < DLB2_COS_NUM_VALS; id++) {
4549                         set_cos_bw_args.cos_id = id;
4550                         set_cos_bw_args.bandwidth = dlb2->cos_bw[id];
4551                         ret = dlb2_iface_set_cos_bw(&dlb2->qm_instance,
4552                                                     &set_cos_bw_args);
4553                         if (ret != 0)
4554                                 break;
4555                 }
4556                 if (ret) {
4557                         DLB2_LOG_ERR("dlb2: failed to configure class of service, err=%d\n",
4558                                      err);
4559                         return err;
4560                 }
4561         }
4562
4563         err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4564         if (err < 0) {
4565                 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4566                              err);
4567                 return err;
4568         }
4569
4570         /* Complete xtstats runtime initialization */
4571         err = dlb2_xstats_init(dlb2);
4572         if (err) {
4573                 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4574                 return err;
4575         }
4576
4577         /* Initialize each port's token pop mode */
4578         for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4579                 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4580
4581         rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4582
4583         dlb2_iface_low_level_io_init();
4584
4585         dlb2_entry_points_init(dev);
4586
4587         dlb2_init_queue_depth_thresholds(dlb2,
4588                                          dlb2_args->qid_depth_thresholds.val);
4589
4590         dlb2_init_cq_weight(dlb2,
4591                             dlb2_args->cq_weight.limit);
4592
4593         dlb2_init_port_cos(dlb2,
4594                            dlb2_args->port_cos.cos_id);
4595
4596         dlb2_init_cos_bw(dlb2,
4597                          &dlb2_args->cos_bw);
4598
4599         return 0;
4600 }
4601
4602 int
4603 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4604                               const char *name)
4605 {
4606         struct dlb2_eventdev *dlb2;
4607         int err;
4608
4609         dlb2 = dev->data->dev_private;
4610
4611         evdev_dlb2_default_info.driver_name = name;
4612
4613         err = dlb2_iface_open(&dlb2->qm_instance, name);
4614         if (err < 0) {
4615                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4616                              err);
4617                 return err;
4618         }
4619
4620         err = dlb2_hw_query_resources(dlb2);
4621         if (err) {
4622                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4623                              err, name);
4624                 return err;
4625         }
4626
4627         dlb2_iface_low_level_io_init();
4628
4629         dlb2_entry_points_init(dev);
4630
4631         return 0;
4632 }
4633
4634 int
4635 dlb2_parse_params(const char *params,
4636                   const char *name,
4637                   struct dlb2_devargs *dlb2_args,
4638                   uint8_t version)
4639 {
4640         int ret = 0;
4641         static const char * const args[] = { NUMA_NODE_ARG,
4642                                              DLB2_MAX_NUM_EVENTS,
4643                                              DLB2_NUM_DIR_CREDITS,
4644                                              DEV_ID_ARG,
4645                                              DLB2_QID_DEPTH_THRESH_ARG,
4646                                              DLB2_COS_ARG,
4647                                              DLB2_POLL_INTERVAL_ARG,
4648                                              DLB2_SW_CREDIT_QUANTA_ARG,
4649                                              DLB2_HW_CREDIT_QUANTA_ARG,
4650                                              DLB2_DEPTH_THRESH_ARG,
4651                                              DLB2_VECTOR_OPTS_ENAB_ARG,
4652                                              DLB2_MAX_CQ_DEPTH,
4653                                              DLB2_CQ_WEIGHT,
4654                                              DLB2_PORT_COS,
4655                                              DLB2_COS_BW,
4656                                              NULL };
4657
4658         if (params != NULL && params[0] != '\0') {
4659                 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4660
4661                 if (kvlist == NULL) {
4662                         RTE_LOG(INFO, PMD,
4663                                 "Ignoring unsupported parameters when creating device '%s'\n",
4664                                 name);
4665                 } else {
4666                         int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4667                                                      set_numa_node,
4668                                                      &dlb2_args->socket_id);
4669                         if (ret != 0) {
4670                                 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4671                                              name);
4672                                 rte_kvargs_free(kvlist);
4673                                 return ret;
4674                         }
4675
4676                         ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4677                                                  set_max_num_events,
4678                                                  &dlb2_args->max_num_events);
4679                         if (ret != 0) {
4680                                 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4681                                              name);
4682                                 rte_kvargs_free(kvlist);
4683                                 return ret;
4684                         }
4685
4686                         if (version == DLB2_HW_V2) {
4687                                 ret = rte_kvargs_process(kvlist,
4688                                         DLB2_NUM_DIR_CREDITS,
4689                                         set_num_dir_credits,
4690                                         &dlb2_args->num_dir_credits_override);
4691                                 if (ret != 0) {
4692                                         DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4693                                                      name);
4694                                         rte_kvargs_free(kvlist);
4695                                         return ret;
4696                                 }
4697                         }
4698                         ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4699                                                  set_dev_id,
4700                                                  &dlb2_args->dev_id);
4701                         if (ret != 0) {
4702                                 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4703                                              name);
4704                                 rte_kvargs_free(kvlist);
4705                                 return ret;
4706                         }
4707
4708                         if (version == DLB2_HW_V2) {
4709                                 ret = rte_kvargs_process(
4710                                         kvlist,
4711                                         DLB2_QID_DEPTH_THRESH_ARG,
4712                                         set_qid_depth_thresh,
4713                                         &dlb2_args->qid_depth_thresholds);
4714                         } else {
4715                                 ret = rte_kvargs_process(
4716                                         kvlist,
4717                                         DLB2_QID_DEPTH_THRESH_ARG,
4718                                         set_qid_depth_thresh_v2_5,
4719                                         &dlb2_args->qid_depth_thresholds);
4720                         }
4721                         if (ret != 0) {
4722                                 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4723                                              name);
4724                                 rte_kvargs_free(kvlist);
4725                                 return ret;
4726                         }
4727
4728                         ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4729                                                  set_poll_interval,
4730                                                  &dlb2_args->poll_interval);
4731                         if (ret != 0) {
4732                                 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4733                                              name);
4734                                 rte_kvargs_free(kvlist);
4735                                 return ret;
4736                         }
4737
4738                         ret = rte_kvargs_process(kvlist,
4739                                                  DLB2_SW_CREDIT_QUANTA_ARG,
4740                                                  set_sw_credit_quanta,
4741                                                  &dlb2_args->sw_credit_quanta);
4742                         if (ret != 0) {
4743                                 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4744                                              name);
4745                                 rte_kvargs_free(kvlist);
4746                                 return ret;
4747                         }
4748
4749                         ret = rte_kvargs_process(kvlist,
4750                                                  DLB2_HW_CREDIT_QUANTA_ARG,
4751                                                  set_hw_credit_quanta,
4752                                                  &dlb2_args->hw_credit_quanta);
4753                         if (ret != 0) {
4754                                 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4755                                              name);
4756                                 rte_kvargs_free(kvlist);
4757                                 return ret;
4758                         }
4759
4760                         ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4761                                         set_default_depth_thresh,
4762                                         &dlb2_args->default_depth_thresh);
4763                         if (ret != 0) {
4764                                 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4765                                              name);
4766                                 rte_kvargs_free(kvlist);
4767                                 return ret;
4768                         }
4769
4770                         ret = rte_kvargs_process(kvlist,
4771                                         DLB2_VECTOR_OPTS_ENAB_ARG,
4772                                         set_vector_opts_enab,
4773                                         &dlb2_args->vector_opts_enabled);
4774                         if (ret != 0) {
4775                                 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4776                                              name);
4777                                 rte_kvargs_free(kvlist);
4778                                 return ret;
4779                         }
4780
4781                         ret = rte_kvargs_process(kvlist,
4782                                         DLB2_MAX_CQ_DEPTH,
4783                                         set_max_cq_depth,
4784                                         &dlb2_args->max_cq_depth);
4785                         if (ret != 0) {
4786                                 DLB2_LOG_ERR("%s: Error parsing max cq depth",
4787                                              name);
4788                                 rte_kvargs_free(kvlist);
4789                                 return ret;
4790                         }
4791
4792                         ret = rte_kvargs_process(kvlist,
4793                                         DLB2_CQ_WEIGHT,
4794                                         set_cq_weight,
4795                                         &dlb2_args->cq_weight);
4796                         if (ret != 0) {
4797                                 DLB2_LOG_ERR("%s: Error parsing cq weight on",
4798                                              name);
4799                                 rte_kvargs_free(kvlist);
4800                                 return ret;
4801                         }
4802
4803                         ret = rte_kvargs_process(kvlist,
4804                                         DLB2_PORT_COS,
4805                                         set_port_cos,
4806                                         &dlb2_args->port_cos);
4807                         if (ret != 0) {
4808                                 DLB2_LOG_ERR("%s: Error parsing port cos",
4809                                              name);
4810                                 rte_kvargs_free(kvlist);
4811                                 return ret;
4812                         }
4813
4814                         ret = rte_kvargs_process(kvlist,
4815                                         DLB2_COS_BW,
4816                                         set_cos_bw,
4817                                         &dlb2_args->cos_bw);
4818                         if (ret != 0) {
4819                                 DLB2_LOG_ERR("%s: Error parsing cos_bw",
4820                                              name);
4821                                 rte_kvargs_free(kvlist);
4822                                 return ret;
4823                         }
4824
4825
4826                         rte_kvargs_free(kvlist);
4827                 }
4828         }
4829         return ret;
4830 }
4831 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);