net/ngbe: support MTU set
[dpdk.git] / drivers / event / dlb2 / dlb2.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2020 Intel Corporation
3  */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46         .driver_name = "", /* probe will set */
47         .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48         .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50         .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52         .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54         .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55         .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56         .max_event_priority_levels = DLB2_QID_PRIORITIES,
57         .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58         .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59         .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60         .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61         .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62         .max_single_link_event_port_queue_pairs =
63                 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64         .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65                           RTE_EVENT_DEV_CAP_EVENT_QOS |
66                           RTE_EVENT_DEV_CAP_BURST_MODE |
67                           RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68                           RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69                           RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES),
70 };
71
72 struct process_local_port_data
73 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
74
75 static void
76 dlb2_free_qe_mem(struct dlb2_port *qm_port)
77 {
78         if (qm_port == NULL)
79                 return;
80
81         rte_free(qm_port->qe4);
82         qm_port->qe4 = NULL;
83
84         rte_free(qm_port->int_arm_qe);
85         qm_port->int_arm_qe = NULL;
86
87         rte_free(qm_port->consume_qe);
88         qm_port->consume_qe = NULL;
89
90         rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
91         dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
92 }
93
94 /* override defaults with value(s) provided on command line */
95 static void
96 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
97                                  int *qid_depth_thresholds)
98 {
99         int q;
100
101         for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
102                 if (qid_depth_thresholds[q] != 0)
103                         dlb2->ev_queues[q].depth_threshold =
104                                 qid_depth_thresholds[q];
105         }
106 }
107
108 static int
109 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
110 {
111         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
112         struct dlb2_hw_resource_info *dlb2_info = &handle->info;
113         int ret;
114
115         /* Query driver resources provisioned for this device */
116
117         ret = dlb2_iface_get_num_resources(handle,
118                                            &dlb2->hw_rsrc_query_results);
119         if (ret) {
120                 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
121                 return ret;
122         }
123
124         /* Complete filling in device resource info returned to evdev app,
125          * overriding any default values.
126          * The capabilities (CAPs) were set at compile time.
127          */
128
129         evdev_dlb2_default_info.max_event_queues =
130                 dlb2->hw_rsrc_query_results.num_ldb_queues;
131
132         evdev_dlb2_default_info.max_event_ports =
133                 dlb2->hw_rsrc_query_results.num_ldb_ports;
134
135         if (dlb2->version == DLB2_HW_V2_5) {
136                 evdev_dlb2_default_info.max_num_events =
137                         dlb2->hw_rsrc_query_results.num_credits;
138         } else {
139                 evdev_dlb2_default_info.max_num_events =
140                         dlb2->hw_rsrc_query_results.num_ldb_credits;
141         }
142         /* Save off values used when creating the scheduling domain. */
143
144         handle->info.num_sched_domains =
145                 dlb2->hw_rsrc_query_results.num_sched_domains;
146
147         if (dlb2->version == DLB2_HW_V2_5) {
148                 handle->info.hw_rsrc_max.nb_events_limit =
149                         dlb2->hw_rsrc_query_results.num_credits;
150         } else {
151                 handle->info.hw_rsrc_max.nb_events_limit =
152                         dlb2->hw_rsrc_query_results.num_ldb_credits;
153         }
154         handle->info.hw_rsrc_max.num_queues =
155                 dlb2->hw_rsrc_query_results.num_ldb_queues +
156                 dlb2->hw_rsrc_query_results.num_dir_ports;
157
158         handle->info.hw_rsrc_max.num_ldb_queues =
159                 dlb2->hw_rsrc_query_results.num_ldb_queues;
160
161         handle->info.hw_rsrc_max.num_ldb_ports =
162                 dlb2->hw_rsrc_query_results.num_ldb_ports;
163
164         handle->info.hw_rsrc_max.num_dir_ports =
165                 dlb2->hw_rsrc_query_results.num_dir_ports;
166
167         handle->info.hw_rsrc_max.reorder_window_size =
168                 dlb2->hw_rsrc_query_results.num_hist_list_entries;
169
170         rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
171
172         return 0;
173 }
174
175 #define DLB2_BASE_10 10
176
177 static int
178 dlb2_string_to_int(int *result, const char *str)
179 {
180         long ret;
181         char *endptr;
182
183         if (str == NULL || result == NULL)
184                 return -EINVAL;
185
186         errno = 0;
187         ret = strtol(str, &endptr, DLB2_BASE_10);
188         if (errno)
189                 return -errno;
190
191         /* long int and int may be different width for some architectures */
192         if (ret < INT_MIN || ret > INT_MAX || endptr == str)
193                 return -EINVAL;
194
195         *result = ret;
196         return 0;
197 }
198
199 static int
200 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
201 {
202         int *socket_id = opaque;
203         int ret;
204
205         ret = dlb2_string_to_int(socket_id, value);
206         if (ret < 0)
207                 return ret;
208
209         if (*socket_id > RTE_MAX_NUMA_NODES)
210                 return -EINVAL;
211         return 0;
212 }
213
214 static int
215 set_max_num_events(const char *key __rte_unused,
216                    const char *value,
217                    void *opaque)
218 {
219         int *max_num_events = opaque;
220         int ret;
221
222         if (value == NULL || opaque == NULL) {
223                 DLB2_LOG_ERR("NULL pointer\n");
224                 return -EINVAL;
225         }
226
227         ret = dlb2_string_to_int(max_num_events, value);
228         if (ret < 0)
229                 return ret;
230
231         if (*max_num_events < 0 || *max_num_events >
232                         DLB2_MAX_NUM_LDB_CREDITS) {
233                 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
234                              DLB2_MAX_NUM_LDB_CREDITS);
235                 return -EINVAL;
236         }
237
238         return 0;
239 }
240
241 static int
242 set_num_dir_credits(const char *key __rte_unused,
243                     const char *value,
244                     void *opaque)
245 {
246         int *num_dir_credits = opaque;
247         int ret;
248
249         if (value == NULL || opaque == NULL) {
250                 DLB2_LOG_ERR("NULL pointer\n");
251                 return -EINVAL;
252         }
253
254         ret = dlb2_string_to_int(num_dir_credits, value);
255         if (ret < 0)
256                 return ret;
257
258         if (*num_dir_credits < 0 ||
259             *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
260                 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
261                              DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
262                 return -EINVAL;
263         }
264
265         return 0;
266 }
267
268 static int
269 set_dev_id(const char *key __rte_unused,
270            const char *value,
271            void *opaque)
272 {
273         int *dev_id = opaque;
274         int ret;
275
276         if (value == NULL || opaque == NULL) {
277                 DLB2_LOG_ERR("NULL pointer\n");
278                 return -EINVAL;
279         }
280
281         ret = dlb2_string_to_int(dev_id, value);
282         if (ret < 0)
283                 return ret;
284
285         return 0;
286 }
287
288 static int
289 set_cos(const char *key __rte_unused,
290         const char *value,
291         void *opaque)
292 {
293         enum dlb2_cos *cos_id = opaque;
294         int x = 0;
295         int ret;
296
297         if (value == NULL || opaque == NULL) {
298                 DLB2_LOG_ERR("NULL pointer\n");
299                 return -EINVAL;
300         }
301
302         ret = dlb2_string_to_int(&x, value);
303         if (ret < 0)
304                 return ret;
305
306         if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
307                 DLB2_LOG_ERR(
308                         "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
309                         x);
310                 return -EINVAL;
311         }
312
313         *cos_id = x;
314
315         return 0;
316 }
317
318 static int
319 set_poll_interval(const char *key __rte_unused,
320         const char *value,
321         void *opaque)
322 {
323         int *poll_interval = opaque;
324         int ret;
325
326         if (value == NULL || opaque == NULL) {
327                 DLB2_LOG_ERR("NULL pointer\n");
328                 return -EINVAL;
329         }
330
331         ret = dlb2_string_to_int(poll_interval, value);
332         if (ret < 0)
333                 return ret;
334
335         return 0;
336 }
337
338 static int
339 set_sw_credit_quanta(const char *key __rte_unused,
340         const char *value,
341         void *opaque)
342 {
343         int *sw_credit_quanta = opaque;
344         int ret;
345
346         if (value == NULL || opaque == NULL) {
347                 DLB2_LOG_ERR("NULL pointer\n");
348                 return -EINVAL;
349         }
350
351         ret = dlb2_string_to_int(sw_credit_quanta, value);
352         if (ret < 0)
353                 return ret;
354
355         return 0;
356 }
357
358 static int
359 set_hw_credit_quanta(const char *key __rte_unused,
360         const char *value,
361         void *opaque)
362 {
363         int *hw_credit_quanta = opaque;
364         int ret;
365
366         if (value == NULL || opaque == NULL) {
367                 DLB2_LOG_ERR("NULL pointer\n");
368                 return -EINVAL;
369         }
370
371         ret = dlb2_string_to_int(hw_credit_quanta, value);
372         if (ret < 0)
373                 return ret;
374
375         return 0;
376 }
377
378 static int
379 set_default_depth_thresh(const char *key __rte_unused,
380         const char *value,
381         void *opaque)
382 {
383         int *default_depth_thresh = opaque;
384         int ret;
385
386         if (value == NULL || opaque == NULL) {
387                 DLB2_LOG_ERR("NULL pointer\n");
388                 return -EINVAL;
389         }
390
391         ret = dlb2_string_to_int(default_depth_thresh, value);
392         if (ret < 0)
393                 return ret;
394
395         return 0;
396 }
397
398 static int
399 set_vector_opts_enab(const char *key __rte_unused,
400         const char *value,
401         void *opaque)
402 {
403         bool *dlb2_vector_opts_enabled = opaque;
404
405         if (value == NULL || opaque == NULL) {
406                 DLB2_LOG_ERR("NULL pointer\n");
407                 return -EINVAL;
408         }
409
410         if ((*value == 'y') || (*value == 'Y'))
411                 *dlb2_vector_opts_enabled = true;
412         else
413                 *dlb2_vector_opts_enabled = false;
414
415         return 0;
416 }
417
418 static int
419 set_qid_depth_thresh(const char *key __rte_unused,
420                      const char *value,
421                      void *opaque)
422 {
423         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
424         int first, last, thresh, i;
425
426         if (value == NULL || opaque == NULL) {
427                 DLB2_LOG_ERR("NULL pointer\n");
428                 return -EINVAL;
429         }
430
431         /* command line override may take one of the following 3 forms:
432          * qid_depth_thresh=all:<threshold_value> ... all queues
433          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
434          * qid_depth_thresh=qid:<threshold_value> ... just one queue
435          */
436         if (sscanf(value, "all:%d", &thresh) == 1) {
437                 first = 0;
438                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
439         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
440                 /* we have everything we need */
441         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
442                 last = first;
443         } else {
444                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
445                 return -EINVAL;
446         }
447
448         if (first > last || first < 0 ||
449                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
450                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
451                 return -EINVAL;
452         }
453
454         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
455                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
456                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
457                 return -EINVAL;
458         }
459
460         for (i = first; i <= last; i++)
461                 qid_thresh->val[i] = thresh; /* indexed by qid */
462
463         return 0;
464 }
465
466 static int
467 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
468                           const char *value,
469                           void *opaque)
470 {
471         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
472         int first, last, thresh, i;
473
474         if (value == NULL || opaque == NULL) {
475                 DLB2_LOG_ERR("NULL pointer\n");
476                 return -EINVAL;
477         }
478
479         /* command line override may take one of the following 3 forms:
480          * qid_depth_thresh=all:<threshold_value> ... all queues
481          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
482          * qid_depth_thresh=qid:<threshold_value> ... just one queue
483          */
484         if (sscanf(value, "all:%d", &thresh) == 1) {
485                 first = 0;
486                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
487         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
488                 /* we have everything we need */
489         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
490                 last = first;
491         } else {
492                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
493                 return -EINVAL;
494         }
495
496         if (first > last || first < 0 ||
497                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
498                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
499                 return -EINVAL;
500         }
501
502         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
503                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
504                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
505                 return -EINVAL;
506         }
507
508         for (i = first; i <= last; i++)
509                 qid_thresh->val[i] = thresh; /* indexed by qid */
510
511         return 0;
512 }
513
514 static void
515 dlb2_eventdev_info_get(struct rte_eventdev *dev,
516                        struct rte_event_dev_info *dev_info)
517 {
518         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
519         int ret;
520
521         ret = dlb2_hw_query_resources(dlb2);
522         if (ret) {
523                 const struct rte_eventdev_data *data = dev->data;
524
525                 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
526                              ret, data->dev_id);
527                 /* fn is void, so fall through and return values set up in
528                  * probe
529                  */
530         }
531
532         /* Add num resources currently owned by this domain.
533          * These would become available if the scheduling domain were reset due
534          * to the application recalling eventdev_configure to *reconfigure* the
535          * domain.
536          */
537         evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
538         evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
539         if (dlb2->version == DLB2_HW_V2_5) {
540                 evdev_dlb2_default_info.max_num_events +=
541                         dlb2->max_credits;
542         } else {
543                 evdev_dlb2_default_info.max_num_events +=
544                         dlb2->max_ldb_credits;
545         }
546         evdev_dlb2_default_info.max_event_queues =
547                 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
548                         RTE_EVENT_MAX_QUEUES_PER_DEV);
549
550         evdev_dlb2_default_info.max_num_events =
551                 RTE_MIN(evdev_dlb2_default_info.max_num_events,
552                         dlb2->max_num_events_override);
553
554         *dev_info = evdev_dlb2_default_info;
555 }
556
557 static int
558 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
559                             const struct dlb2_hw_rsrcs *resources_asked,
560                             uint8_t device_version)
561 {
562         int ret = 0;
563         struct dlb2_create_sched_domain_args *cfg;
564
565         if (resources_asked == NULL) {
566                 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
567                 ret = EINVAL;
568                 goto error_exit;
569         }
570
571         /* Map generic qm resources to dlb2 resources */
572         cfg = &handle->cfg.resources;
573
574         /* DIR ports and queues */
575
576         cfg->num_dir_ports = resources_asked->num_dir_ports;
577         if (device_version == DLB2_HW_V2_5)
578                 cfg->num_credits = resources_asked->num_credits;
579         else
580                 cfg->num_dir_credits = resources_asked->num_dir_credits;
581
582         /* LDB queues */
583
584         cfg->num_ldb_queues = resources_asked->num_ldb_queues;
585
586         /* LDB ports */
587
588         cfg->cos_strict = 0; /* Best effort */
589         cfg->num_cos_ldb_ports[0] = 0;
590         cfg->num_cos_ldb_ports[1] = 0;
591         cfg->num_cos_ldb_ports[2] = 0;
592         cfg->num_cos_ldb_ports[3] = 0;
593
594         switch (handle->cos_id) {
595         case DLB2_COS_0:
596                 cfg->num_ldb_ports = 0; /* no don't care ports */
597                 cfg->num_cos_ldb_ports[0] =
598                         resources_asked->num_ldb_ports;
599                 break;
600         case DLB2_COS_1:
601                 cfg->num_ldb_ports = 0; /* no don't care ports */
602                 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
603                 break;
604         case DLB2_COS_2:
605                 cfg->num_ldb_ports = 0; /* no don't care ports */
606                 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
607                 break;
608         case DLB2_COS_3:
609                 cfg->num_ldb_ports = 0; /* no don't care ports */
610                 cfg->num_cos_ldb_ports[3] =
611                         resources_asked->num_ldb_ports;
612                 break;
613         case DLB2_COS_DEFAULT:
614                 /* all ldb ports are don't care ports from a cos perspective */
615                 cfg->num_ldb_ports =
616                         resources_asked->num_ldb_ports;
617                 break;
618         }
619
620         if (device_version == DLB2_HW_V2)
621                 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
622
623         cfg->num_atomic_inflights =
624                 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
625                 cfg->num_ldb_queues;
626
627         cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
628                 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
629
630         if (device_version == DLB2_HW_V2_5) {
631                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
632                              cfg->num_ldb_queues,
633                              resources_asked->num_ldb_ports,
634                              cfg->num_dir_ports,
635                              cfg->num_atomic_inflights,
636                              cfg->num_hist_list_entries,
637                              cfg->num_credits);
638         } else {
639                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
640                              cfg->num_ldb_queues,
641                              resources_asked->num_ldb_ports,
642                              cfg->num_dir_ports,
643                              cfg->num_atomic_inflights,
644                              cfg->num_hist_list_entries,
645                              cfg->num_ldb_credits,
646                              cfg->num_dir_credits);
647         }
648
649         /* Configure the QM */
650
651         ret = dlb2_iface_sched_domain_create(handle, cfg);
652         if (ret < 0) {
653                 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
654                              ret,
655                              dlb2_error_strings[cfg->response.status]);
656
657                 goto error_exit;
658         }
659
660         handle->domain_id = cfg->response.id;
661         handle->cfg.configured = true;
662
663 error_exit:
664
665         return ret;
666 }
667
668 static void
669 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
670 {
671         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
672         enum dlb2_configuration_state config_state;
673         int i, j;
674
675         dlb2_iface_domain_reset(dlb2);
676
677         /* Free all dynamically allocated port memory */
678         for (i = 0; i < dlb2->num_ports; i++)
679                 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
680
681         /* If reconfiguring, mark the device's queues and ports as "previously
682          * configured." If the user doesn't reconfigure them, the PMD will
683          * reapply their previous configuration when the device is started.
684          */
685         config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
686                 DLB2_NOT_CONFIGURED;
687
688         for (i = 0; i < dlb2->num_ports; i++) {
689                 dlb2->ev_ports[i].qm_port.config_state = config_state;
690                 /* Reset setup_done so ports can be reconfigured */
691                 dlb2->ev_ports[i].setup_done = false;
692                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
693                         dlb2->ev_ports[i].link[j].mapped = false;
694         }
695
696         for (i = 0; i < dlb2->num_queues; i++)
697                 dlb2->ev_queues[i].qm_queue.config_state = config_state;
698
699         for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
700                 dlb2->ev_queues[i].setup_done = false;
701
702         dlb2->num_ports = 0;
703         dlb2->num_ldb_ports = 0;
704         dlb2->num_dir_ports = 0;
705         dlb2->num_queues = 0;
706         dlb2->num_ldb_queues = 0;
707         dlb2->num_dir_queues = 0;
708         dlb2->configured = false;
709 }
710
711 /* Note: 1 QM instance per QM device, QM instance/device == event device */
712 static int
713 dlb2_eventdev_configure(const struct rte_eventdev *dev)
714 {
715         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
716         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
717         struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
718         const struct rte_eventdev_data *data = dev->data;
719         const struct rte_event_dev_config *config = &data->dev_conf;
720         int ret;
721
722         /* If this eventdev is already configured, we must release the current
723          * scheduling domain before attempting to configure a new one.
724          */
725         if (dlb2->configured) {
726                 dlb2_hw_reset_sched_domain(dev, true);
727                 ret = dlb2_hw_query_resources(dlb2);
728                 if (ret) {
729                         DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
730                                      ret, data->dev_id);
731                         return ret;
732                 }
733         }
734
735         if (config->nb_event_queues > rsrcs->num_queues) {
736                 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
737                              config->nb_event_queues,
738                              rsrcs->num_queues);
739                 return -EINVAL;
740         }
741         if (config->nb_event_ports > (rsrcs->num_ldb_ports
742                         + rsrcs->num_dir_ports)) {
743                 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
744                              config->nb_event_ports,
745                              (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
746                 return -EINVAL;
747         }
748         if (config->nb_events_limit > rsrcs->nb_events_limit) {
749                 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
750                              config->nb_events_limit,
751                              rsrcs->nb_events_limit);
752                 return -EINVAL;
753         }
754
755         if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
756                 dlb2->global_dequeue_wait = false;
757         else {
758                 uint32_t timeout32;
759
760                 dlb2->global_dequeue_wait = true;
761
762                 /* note size mismatch of timeout vals in eventdev lib. */
763                 timeout32 = config->dequeue_timeout_ns;
764
765                 dlb2->global_dequeue_wait_ticks =
766                         timeout32 * (rte_get_timer_hz() / 1E9);
767         }
768
769         /* Does this platform support umonitor/umwait? */
770         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
771                 dlb2->umwait_allowed = true;
772
773         rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
774         rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
775         /* 1 dir queue per dir port */
776         rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
777
778         if (dlb2->version == DLB2_HW_V2_5) {
779                 rsrcs->num_credits = 0;
780                 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
781                         rsrcs->num_credits = config->nb_events_limit;
782         } else {
783                 /* Scale down nb_events_limit by 4 for directed credits,
784                  * since there are 4x as many load-balanced credits.
785                  */
786                 rsrcs->num_ldb_credits = 0;
787                 rsrcs->num_dir_credits = 0;
788
789                 if (rsrcs->num_ldb_queues)
790                         rsrcs->num_ldb_credits = config->nb_events_limit;
791                 if (rsrcs->num_dir_ports)
792                         rsrcs->num_dir_credits = config->nb_events_limit / 2;
793                 if (dlb2->num_dir_credits_override != -1)
794                         rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
795         }
796
797         if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
798                 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
799                 return -ENODEV;
800         }
801
802         dlb2->new_event_limit = config->nb_events_limit;
803         __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
804
805         /* Save number of ports/queues for this event dev */
806         dlb2->num_ports = config->nb_event_ports;
807         dlb2->num_queues = config->nb_event_queues;
808         dlb2->num_dir_ports = rsrcs->num_dir_ports;
809         dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
810         dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
811         dlb2->num_dir_queues = dlb2->num_dir_ports;
812         if (dlb2->version == DLB2_HW_V2_5) {
813                 dlb2->credit_pool = rsrcs->num_credits;
814                 dlb2->max_credits = rsrcs->num_credits;
815         } else {
816                 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
817                 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
818                 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
819                 dlb2->max_dir_credits = rsrcs->num_dir_credits;
820         }
821
822         dlb2->configured = true;
823
824         return 0;
825 }
826
827 static void
828 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
829                                     uint8_t port_id,
830                                     struct rte_event_port_conf *port_conf)
831 {
832         RTE_SET_USED(port_id);
833         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
834
835         port_conf->new_event_threshold = dlb2->new_event_limit;
836         port_conf->dequeue_depth = 32;
837         port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
838         port_conf->event_port_cfg = 0;
839 }
840
841 static void
842 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
843                                      uint8_t queue_id,
844                                      struct rte_event_queue_conf *queue_conf)
845 {
846         RTE_SET_USED(dev);
847         RTE_SET_USED(queue_id);
848
849         queue_conf->nb_atomic_flows = 1024;
850         queue_conf->nb_atomic_order_sequences = 64;
851         queue_conf->event_queue_cfg = 0;
852         queue_conf->priority = 0;
853 }
854
855 static int32_t
856 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
857 {
858         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
859         struct dlb2_get_sn_allocation_args cfg;
860         int ret;
861
862         cfg.group = group;
863
864         ret = dlb2_iface_get_sn_allocation(handle, &cfg);
865         if (ret < 0) {
866                 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
867                              ret, dlb2_error_strings[cfg.response.status]);
868                 return ret;
869         }
870
871         return cfg.response.id;
872 }
873
874 static int
875 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
876 {
877         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
878         struct dlb2_set_sn_allocation_args cfg;
879         int ret;
880
881         cfg.num = num;
882         cfg.group = group;
883
884         ret = dlb2_iface_set_sn_allocation(handle, &cfg);
885         if (ret < 0) {
886                 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
887                              ret, dlb2_error_strings[cfg.response.status]);
888                 return ret;
889         }
890
891         return ret;
892 }
893
894 static int32_t
895 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
896 {
897         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
898         struct dlb2_get_sn_occupancy_args cfg;
899         int ret;
900
901         cfg.group = group;
902
903         ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
904         if (ret < 0) {
905                 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
906                              ret, dlb2_error_strings[cfg.response.status]);
907                 return ret;
908         }
909
910         return cfg.response.id;
911 }
912
913 /* Query the current sequence number allocations and, if they conflict with the
914  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
915  * This is best-effort; if it fails, the PMD will attempt to configure the
916  * load-balanced queue and return an error.
917  */
918 static void
919 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
920                            const struct rte_event_queue_conf *queue_conf)
921 {
922         int grp_occupancy[DLB2_NUM_SN_GROUPS];
923         int grp_alloc[DLB2_NUM_SN_GROUPS];
924         int i, sequence_numbers;
925
926         sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
927
928         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
929                 int total_slots;
930
931                 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
932                 if (grp_alloc[i] < 0)
933                         return;
934
935                 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
936
937                 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
938                 if (grp_occupancy[i] < 0)
939                         return;
940
941                 /* DLB has at least one available slot for the requested
942                  * sequence numbers, so no further configuration required.
943                  */
944                 if (grp_alloc[i] == sequence_numbers &&
945                     grp_occupancy[i] < total_slots)
946                         return;
947         }
948
949         /* None of the sequence number groups are configured for the requested
950          * sequence numbers, so we have to reconfigure one of them. This is
951          * only possible if a group is not in use.
952          */
953         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
954                 if (grp_occupancy[i] == 0)
955                         break;
956         }
957
958         if (i == DLB2_NUM_SN_GROUPS) {
959                 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
960                        __func__, sequence_numbers);
961                 return;
962         }
963
964         /* Attempt to configure slot i with the requested number of sequence
965          * numbers. Ignore the return value -- if this fails, the error will be
966          * caught during subsequent queue configuration.
967          */
968         dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
969 }
970
971 static int32_t
972 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
973                          struct dlb2_eventdev_queue *ev_queue,
974                          const struct rte_event_queue_conf *evq_conf)
975 {
976         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
977         struct dlb2_queue *queue = &ev_queue->qm_queue;
978         struct dlb2_create_ldb_queue_args cfg;
979         int32_t ret;
980         uint32_t qm_qid;
981         int sched_type = -1;
982
983         if (evq_conf == NULL)
984                 return -EINVAL;
985
986         if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
987                 if (evq_conf->nb_atomic_order_sequences != 0)
988                         sched_type = RTE_SCHED_TYPE_ORDERED;
989                 else
990                         sched_type = RTE_SCHED_TYPE_PARALLEL;
991         } else
992                 sched_type = evq_conf->schedule_type;
993
994         cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
995         cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
996         cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
997
998         if (sched_type != RTE_SCHED_TYPE_ORDERED) {
999                 cfg.num_sequence_numbers = 0;
1000                 cfg.num_qid_inflights = 2048;
1001         }
1002
1003         /* App should set this to the number of hardware flows they want, not
1004          * the overall number of flows they're going to use. E.g. if app is
1005          * using 64 flows and sets compression to 64, best-case they'll get
1006          * 64 unique hashed flows in hardware.
1007          */
1008         switch (evq_conf->nb_atomic_flows) {
1009         /* Valid DLB2 compression levels */
1010         case 64:
1011         case 128:
1012         case 256:
1013         case 512:
1014         case (1 * 1024): /* 1K */
1015         case (2 * 1024): /* 2K */
1016         case (4 * 1024): /* 4K */
1017         case (64 * 1024): /* 64K */
1018                 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1019                 break;
1020         default:
1021                 /* Invalid compression level */
1022                 cfg.lock_id_comp_level = 0; /* no compression */
1023         }
1024
1025         if (ev_queue->depth_threshold == 0) {
1026                 cfg.depth_threshold = dlb2->default_depth_thresh;
1027                 ev_queue->depth_threshold =
1028                         dlb2->default_depth_thresh;
1029         } else
1030                 cfg.depth_threshold = ev_queue->depth_threshold;
1031
1032         ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1033         if (ret < 0) {
1034                 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1035                              ret, dlb2_error_strings[cfg.response.status]);
1036                 return -EINVAL;
1037         }
1038
1039         qm_qid = cfg.response.id;
1040
1041         /* Save off queue config for debug, resource lookups, and reconfig */
1042         queue->num_qid_inflights = cfg.num_qid_inflights;
1043         queue->num_atm_inflights = cfg.num_atomic_inflights;
1044
1045         queue->sched_type = sched_type;
1046         queue->config_state = DLB2_CONFIGURED;
1047
1048         DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1049                      qm_qid,
1050                      cfg.num_atomic_inflights,
1051                      cfg.num_sequence_numbers,
1052                      cfg.num_qid_inflights);
1053
1054         return qm_qid;
1055 }
1056
1057 static int
1058 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1059                               struct dlb2_eventdev_queue *ev_queue,
1060                               const struct rte_event_queue_conf *queue_conf)
1061 {
1062         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1063         int32_t qm_qid;
1064
1065         if (queue_conf->nb_atomic_order_sequences)
1066                 dlb2_program_sn_allocation(dlb2, queue_conf);
1067
1068         qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1069         if (qm_qid < 0) {
1070                 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1071
1072                 return qm_qid;
1073         }
1074
1075         dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1076
1077         ev_queue->qm_queue.id = qm_qid;
1078
1079         return 0;
1080 }
1081
1082 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1083 {
1084         int i, num = 0;
1085
1086         for (i = 0; i < dlb2->num_queues; i++) {
1087                 if (dlb2->ev_queues[i].setup_done &&
1088                     dlb2->ev_queues[i].qm_queue.is_directed)
1089                         num++;
1090         }
1091
1092         return num;
1093 }
1094
1095 static void
1096 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1097                          struct dlb2_eventdev_queue *ev_queue)
1098 {
1099         struct dlb2_eventdev_port *ev_port;
1100         int i, j;
1101
1102         for (i = 0; i < dlb2->num_ports; i++) {
1103                 ev_port = &dlb2->ev_ports[i];
1104
1105                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1106                         if (!ev_port->link[j].valid ||
1107                             ev_port->link[j].queue_id != ev_queue->id)
1108                                 continue;
1109
1110                         ev_port->link[j].valid = false;
1111                         ev_port->num_links--;
1112                 }
1113         }
1114
1115         ev_queue->num_links = 0;
1116 }
1117
1118 static int
1119 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1120                           uint8_t ev_qid,
1121                           const struct rte_event_queue_conf *queue_conf)
1122 {
1123         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1124         struct dlb2_eventdev_queue *ev_queue;
1125         int ret;
1126
1127         if (queue_conf == NULL)
1128                 return -EINVAL;
1129
1130         if (ev_qid >= dlb2->num_queues)
1131                 return -EINVAL;
1132
1133         ev_queue = &dlb2->ev_queues[ev_qid];
1134
1135         ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1136                 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1137         ev_queue->id = ev_qid;
1138         ev_queue->conf = *queue_conf;
1139
1140         if (!ev_queue->qm_queue.is_directed) {
1141                 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1142         } else {
1143                 /* The directed queue isn't setup until link time, at which
1144                  * point we know its directed port ID. Directed queue setup
1145                  * will only fail if this queue is already setup or there are
1146                  * no directed queues left to configure.
1147                  */
1148                 ret = 0;
1149
1150                 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1151
1152                 if (ev_queue->setup_done ||
1153                     dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1154                         ret = -EINVAL;
1155         }
1156
1157         /* Tear down pre-existing port->queue links */
1158         if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1159                 dlb2_queue_link_teardown(dlb2, ev_queue);
1160
1161         if (!ret)
1162                 ev_queue->setup_done = true;
1163
1164         return ret;
1165 }
1166
1167 static int
1168 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1169 {
1170         struct dlb2_cq_pop_qe *qe;
1171
1172         qe = rte_zmalloc(mz_name,
1173                         DLB2_NUM_QES_PER_CACHE_LINE *
1174                                 sizeof(struct dlb2_cq_pop_qe),
1175                         RTE_CACHE_LINE_SIZE);
1176
1177         if (qe == NULL) {
1178                 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1179                 return -ENOMEM;
1180         }
1181         qm_port->consume_qe = qe;
1182
1183         qe->qe_valid = 0;
1184         qe->qe_frag = 0;
1185         qe->qe_comp = 0;
1186         qe->cq_token = 1;
1187         /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1188          * and so on.
1189          */
1190         qe->tokens = 0; /* set at run time */
1191         qe->meas_lat = 0;
1192         qe->no_dec = 0;
1193         /* Completion IDs are disabled */
1194         qe->cmp_id = 0;
1195
1196         return 0;
1197 }
1198
1199 static int
1200 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1201 {
1202         struct dlb2_enqueue_qe *qe;
1203
1204         qe = rte_zmalloc(mz_name,
1205                         DLB2_NUM_QES_PER_CACHE_LINE *
1206                                 sizeof(struct dlb2_enqueue_qe),
1207                         RTE_CACHE_LINE_SIZE);
1208
1209         if (qe == NULL) {
1210                 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1211                 return -ENOMEM;
1212         }
1213         qm_port->int_arm_qe = qe;
1214
1215         /* V2 - INT ARM is CQ_TOKEN + FRAG */
1216         qe->qe_valid = 0;
1217         qe->qe_frag = 1;
1218         qe->qe_comp = 0;
1219         qe->cq_token = 1;
1220         qe->meas_lat = 0;
1221         qe->no_dec = 0;
1222         /* Completion IDs are disabled */
1223         qe->cmp_id = 0;
1224
1225         return 0;
1226 }
1227
1228 static int
1229 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1230 {
1231         int ret, sz;
1232
1233         sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1234
1235         qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1236
1237         if (qm_port->qe4 == NULL) {
1238                 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1239                 ret = -ENOMEM;
1240                 goto error_exit;
1241         }
1242
1243         ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1244         if (ret < 0) {
1245                 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1246                 goto error_exit;
1247         }
1248
1249         ret = dlb2_init_consume_qe(qm_port, mz_name);
1250         if (ret < 0) {
1251                 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1252                 goto error_exit;
1253         }
1254
1255         return 0;
1256
1257 error_exit:
1258
1259         dlb2_free_qe_mem(qm_port);
1260
1261         return ret;
1262 }
1263
1264 static inline uint16_t
1265 dlb2_event_enqueue_delayed(void *event_port,
1266                            const struct rte_event events[]);
1267
1268 static inline uint16_t
1269 dlb2_event_enqueue_burst_delayed(void *event_port,
1270                                  const struct rte_event events[],
1271                                  uint16_t num);
1272
1273 static inline uint16_t
1274 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1275                                      const struct rte_event events[],
1276                                      uint16_t num);
1277
1278 static inline uint16_t
1279 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1280                                          const struct rte_event events[],
1281                                          uint16_t num);
1282
1283 /* Generate the required bitmask for rotate-style expected QE gen bits.
1284  * This requires a pattern of 1's and zeros, starting with expected as
1285  * 1 bits, so when hardware writes 0's they're "new". This requires the
1286  * ring size to be powers of 2 to wrap correctly.
1287  */
1288 static void
1289 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1290 {
1291         uint64_t cq_build_mask = 0;
1292         uint32_t i;
1293
1294         if (cq_depth > 64)
1295                 return; /* need to fall back to scalar code */
1296
1297         /*
1298          * all 1's in first u64, all zeros in second is correct bit pattern to
1299          * start. Special casing == 64 easier than adapting complex loop logic.
1300          */
1301         if (cq_depth == 64) {
1302                 qm_port->cq_rolling_mask = 0;
1303                 qm_port->cq_rolling_mask_2 = -1;
1304                 return;
1305         }
1306
1307         for (i = 0; i < 64; i += (cq_depth * 2))
1308                 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1309
1310         qm_port->cq_rolling_mask = cq_build_mask;
1311         qm_port->cq_rolling_mask_2 = cq_build_mask;
1312 }
1313
1314 static int
1315 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1316                         struct dlb2_eventdev_port *ev_port,
1317                         uint32_t dequeue_depth,
1318                         uint32_t enqueue_depth)
1319 {
1320         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1321         struct dlb2_create_ldb_port_args cfg = { {0} };
1322         int ret;
1323         struct dlb2_port *qm_port = NULL;
1324         char mz_name[RTE_MEMZONE_NAMESIZE];
1325         uint32_t qm_port_id;
1326         uint16_t ldb_credit_high_watermark = 0;
1327         uint16_t dir_credit_high_watermark = 0;
1328         uint16_t credit_high_watermark = 0;
1329
1330         if (handle == NULL)
1331                 return -EINVAL;
1332
1333         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1334                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1335                              DLB2_MIN_CQ_DEPTH);
1336                 return -EINVAL;
1337         }
1338
1339         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1340                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1341                              DLB2_MIN_ENQUEUE_DEPTH);
1342                 return -EINVAL;
1343         }
1344
1345         rte_spinlock_lock(&handle->resource_lock);
1346
1347         /* We round up to the next power of 2 if necessary */
1348         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1349         cfg.cq_depth_threshold = 1;
1350
1351         cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1352
1353         if (handle->cos_id == DLB2_COS_DEFAULT)
1354                 cfg.cos_id = 0;
1355         else
1356                 cfg.cos_id = handle->cos_id;
1357
1358         cfg.cos_strict = 0;
1359
1360         /* User controls the LDB high watermark via enqueue depth. The DIR high
1361          * watermark is equal, unless the directed credit pool is too small.
1362          */
1363         if (dlb2->version == DLB2_HW_V2) {
1364                 ldb_credit_high_watermark = enqueue_depth;
1365                 /* If there are no directed ports, the kernel driver will
1366                  * ignore this port's directed credit settings. Don't use
1367                  * enqueue_depth if it would require more directed credits
1368                  * than are available.
1369                  */
1370                 dir_credit_high_watermark =
1371                         RTE_MIN(enqueue_depth,
1372                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1373         } else
1374                 credit_high_watermark = enqueue_depth;
1375
1376         /* Per QM values */
1377
1378         ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1379         if (ret < 0) {
1380                 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1381                              ret, dlb2_error_strings[cfg.response.status]);
1382                 goto error_exit;
1383         }
1384
1385         qm_port_id = cfg.response.id;
1386
1387         DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1388                      ev_port->id, qm_port_id);
1389
1390         qm_port = &ev_port->qm_port;
1391         qm_port->ev_port = ev_port; /* back ptr */
1392         qm_port->dlb2 = dlb2; /* back ptr */
1393         /*
1394          * Allocate and init local qe struct(s).
1395          * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1396          */
1397
1398         snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1399                  ev_port->id);
1400
1401         ret = dlb2_init_qe_mem(qm_port, mz_name);
1402         if (ret < 0) {
1403                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1404                 goto error_exit;
1405         }
1406
1407         qm_port->id = qm_port_id;
1408
1409         if (dlb2->version == DLB2_HW_V2) {
1410                 qm_port->cached_ldb_credits = 0;
1411                 qm_port->cached_dir_credits = 0;
1412         } else
1413                 qm_port->cached_credits = 0;
1414
1415         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1416          * the effective depth is smaller.
1417          */
1418         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1419         qm_port->cq_idx = 0;
1420         qm_port->cq_idx_unmasked = 0;
1421
1422         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1423                 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1424         else
1425                 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1426
1427         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1428         /* starting value of gen bit - it toggles at wrap time */
1429         qm_port->gen_bit = 1;
1430
1431         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1432
1433         qm_port->int_armed = false;
1434
1435         /* Save off for later use in info and lookup APIs. */
1436         qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1437
1438         qm_port->dequeue_depth = dequeue_depth;
1439         qm_port->token_pop_thresh = dequeue_depth;
1440
1441         /* The default enqueue functions do not include delayed-pop support for
1442          * performance reasons.
1443          */
1444         if (qm_port->token_pop_mode == DELAYED_POP) {
1445                 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1446                 dlb2->event_dev->enqueue_burst =
1447                         dlb2_event_enqueue_burst_delayed;
1448                 dlb2->event_dev->enqueue_new_burst =
1449                         dlb2_event_enqueue_new_burst_delayed;
1450                 dlb2->event_dev->enqueue_forward_burst =
1451                         dlb2_event_enqueue_forward_burst_delayed;
1452         }
1453
1454         qm_port->owed_tokens = 0;
1455         qm_port->issued_releases = 0;
1456
1457         /* Save config message too. */
1458         rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1459
1460         /* update state */
1461         qm_port->state = PORT_STARTED; /* enabled at create time */
1462         qm_port->config_state = DLB2_CONFIGURED;
1463
1464         if (dlb2->version == DLB2_HW_V2) {
1465                 qm_port->dir_credits = dir_credit_high_watermark;
1466                 qm_port->ldb_credits = ldb_credit_high_watermark;
1467                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1468                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1469
1470                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1471                              qm_port_id,
1472                              dequeue_depth,
1473                              qm_port->ldb_credits,
1474                              qm_port->dir_credits);
1475         } else {
1476                 qm_port->credits = credit_high_watermark;
1477                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1478
1479                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1480                              qm_port_id,
1481                              dequeue_depth,
1482                              qm_port->credits);
1483         }
1484
1485         qm_port->use_scalar = false;
1486
1487 #if (!defined RTE_ARCH_X86_64)
1488         qm_port->use_scalar = true;
1489 #else
1490         if ((qm_port->cq_depth > 64) ||
1491             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1492             (dlb2->vector_opts_enabled == false))
1493                 qm_port->use_scalar = true;
1494 #endif
1495
1496         rte_spinlock_unlock(&handle->resource_lock);
1497
1498         return 0;
1499
1500 error_exit:
1501
1502         if (qm_port)
1503                 dlb2_free_qe_mem(qm_port);
1504
1505         rte_spinlock_unlock(&handle->resource_lock);
1506
1507         DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1508
1509         return ret;
1510 }
1511
1512 static void
1513 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1514                         struct dlb2_eventdev_port *ev_port)
1515 {
1516         struct dlb2_eventdev_queue *ev_queue;
1517         int i;
1518
1519         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1520                 if (!ev_port->link[i].valid)
1521                         continue;
1522
1523                 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1524
1525                 ev_port->link[i].valid = false;
1526                 ev_port->num_links--;
1527                 ev_queue->num_links--;
1528         }
1529 }
1530
1531 static int
1532 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1533                         struct dlb2_eventdev_port *ev_port,
1534                         uint32_t dequeue_depth,
1535                         uint32_t enqueue_depth)
1536 {
1537         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1538         struct dlb2_create_dir_port_args cfg = { {0} };
1539         int ret;
1540         struct dlb2_port *qm_port = NULL;
1541         char mz_name[RTE_MEMZONE_NAMESIZE];
1542         uint32_t qm_port_id;
1543         uint16_t ldb_credit_high_watermark = 0;
1544         uint16_t dir_credit_high_watermark = 0;
1545         uint16_t credit_high_watermark = 0;
1546
1547         if (dlb2 == NULL || handle == NULL)
1548                 return -EINVAL;
1549
1550         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1551                 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1552                              DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1553                 return -EINVAL;
1554         }
1555
1556         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1557                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1558                              DLB2_MIN_ENQUEUE_DEPTH);
1559                 return -EINVAL;
1560         }
1561
1562         rte_spinlock_lock(&handle->resource_lock);
1563
1564         /* Directed queues are configured at link time. */
1565         cfg.queue_id = -1;
1566
1567         /* We round up to the next power of 2 if necessary */
1568         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1569         cfg.cq_depth_threshold = 1;
1570
1571         /* User controls the LDB high watermark via enqueue depth. The DIR high
1572          * watermark is equal, unless the directed credit pool is too small.
1573          */
1574         if (dlb2->version == DLB2_HW_V2) {
1575                 ldb_credit_high_watermark = enqueue_depth;
1576                 /* Don't use enqueue_depth if it would require more directed
1577                  * credits than are available.
1578                  */
1579                 dir_credit_high_watermark =
1580                         RTE_MIN(enqueue_depth,
1581                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1582         } else
1583                 credit_high_watermark = enqueue_depth;
1584
1585         /* Per QM values */
1586
1587         ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1588         if (ret < 0) {
1589                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1590                              ret, dlb2_error_strings[cfg.response.status]);
1591                 goto error_exit;
1592         }
1593
1594         qm_port_id = cfg.response.id;
1595
1596         DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1597                      ev_port->id, qm_port_id);
1598
1599         qm_port = &ev_port->qm_port;
1600         qm_port->ev_port = ev_port; /* back ptr */
1601         qm_port->dlb2 = dlb2;  /* back ptr */
1602
1603         /*
1604          * Init local qe struct(s).
1605          * Note: MOVDIR64 requires the enqueue QE to be aligned
1606          */
1607
1608         snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1609                  ev_port->id);
1610
1611         ret = dlb2_init_qe_mem(qm_port, mz_name);
1612
1613         if (ret < 0) {
1614                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1615                 goto error_exit;
1616         }
1617
1618         qm_port->id = qm_port_id;
1619
1620         if (dlb2->version == DLB2_HW_V2) {
1621                 qm_port->cached_ldb_credits = 0;
1622                 qm_port->cached_dir_credits = 0;
1623         } else
1624                 qm_port->cached_credits = 0;
1625
1626         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1627          * the effective depth is smaller.
1628          */
1629         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1630         qm_port->cq_idx = 0;
1631         qm_port->cq_idx_unmasked = 0;
1632
1633         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1634                 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1635         else
1636                 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1637
1638         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1639         /* starting value of gen bit - it toggles at wrap time */
1640         qm_port->gen_bit = 1;
1641         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1642
1643         qm_port->int_armed = false;
1644
1645         /* Save off for later use in info and lookup APIs. */
1646         qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1647
1648         qm_port->dequeue_depth = dequeue_depth;
1649
1650         /* Directed ports are auto-pop, by default. */
1651         qm_port->token_pop_mode = AUTO_POP;
1652         qm_port->owed_tokens = 0;
1653         qm_port->issued_releases = 0;
1654
1655         /* Save config message too. */
1656         rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1657
1658         /* update state */
1659         qm_port->state = PORT_STARTED; /* enabled at create time */
1660         qm_port->config_state = DLB2_CONFIGURED;
1661
1662         if (dlb2->version == DLB2_HW_V2) {
1663                 qm_port->dir_credits = dir_credit_high_watermark;
1664                 qm_port->ldb_credits = ldb_credit_high_watermark;
1665                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1666                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1667
1668                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1669                              qm_port_id,
1670                              dequeue_depth,
1671                              dir_credit_high_watermark,
1672                              ldb_credit_high_watermark);
1673         } else {
1674                 qm_port->credits = credit_high_watermark;
1675                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1676
1677                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1678                              qm_port_id,
1679                              dequeue_depth,
1680                              credit_high_watermark);
1681         }
1682
1683 #if (!defined RTE_ARCH_X86_64)
1684         qm_port->use_scalar = true;
1685 #else
1686         if ((qm_port->cq_depth > 64) ||
1687             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1688             (dlb2->vector_opts_enabled == false))
1689                 qm_port->use_scalar = true;
1690 #endif
1691
1692         rte_spinlock_unlock(&handle->resource_lock);
1693
1694         return 0;
1695
1696 error_exit:
1697
1698         if (qm_port)
1699                 dlb2_free_qe_mem(qm_port);
1700
1701         rte_spinlock_unlock(&handle->resource_lock);
1702
1703         DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1704
1705         return ret;
1706 }
1707
1708 static int
1709 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1710                          uint8_t ev_port_id,
1711                          const struct rte_event_port_conf *port_conf)
1712 {
1713         struct dlb2_eventdev *dlb2;
1714         struct dlb2_eventdev_port *ev_port;
1715         int ret;
1716         uint32_t hw_credit_quanta, sw_credit_quanta;
1717
1718         if (dev == NULL || port_conf == NULL) {
1719                 DLB2_LOG_ERR("Null parameter\n");
1720                 return -EINVAL;
1721         }
1722
1723         dlb2 = dlb2_pmd_priv(dev);
1724
1725         if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1726                 return -EINVAL;
1727
1728         if (port_conf->dequeue_depth >
1729                 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1730             port_conf->enqueue_depth >
1731                 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1732                 return -EINVAL;
1733
1734         ev_port = &dlb2->ev_ports[ev_port_id];
1735         /* configured? */
1736         if (ev_port->setup_done) {
1737                 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1738                 return -EINVAL;
1739         }
1740
1741         ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1742                 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1743
1744         if (!ev_port->qm_port.is_directed) {
1745                 ret = dlb2_hw_create_ldb_port(dlb2,
1746                                               ev_port,
1747                                               port_conf->dequeue_depth,
1748                                               port_conf->enqueue_depth);
1749                 if (ret < 0) {
1750                         DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1751                                      ev_port_id);
1752
1753                         return ret;
1754                 }
1755         } else {
1756                 ret = dlb2_hw_create_dir_port(dlb2,
1757                                               ev_port,
1758                                               port_conf->dequeue_depth,
1759                                               port_conf->enqueue_depth);
1760                 if (ret < 0) {
1761                         DLB2_LOG_ERR("Failed to create the DIR port\n");
1762                         return ret;
1763                 }
1764         }
1765
1766         /* Save off port config for reconfig */
1767         ev_port->conf = *port_conf;
1768
1769         ev_port->id = ev_port_id;
1770         ev_port->enq_configured = true;
1771         ev_port->setup_done = true;
1772         ev_port->inflight_max = port_conf->new_event_threshold;
1773         ev_port->implicit_release = !(port_conf->event_port_cfg &
1774                   RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1775         ev_port->outstanding_releases = 0;
1776         ev_port->inflight_credits = 0;
1777         ev_port->dlb2 = dlb2; /* reverse link */
1778
1779         /* Default for worker ports */
1780         sw_credit_quanta = dlb2->sw_credit_quanta;
1781         hw_credit_quanta = dlb2->hw_credit_quanta;
1782
1783         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1784                 /* Producer type ports. Mostly enqueue */
1785                 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1786                 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1787         }
1788         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1789                 /* Consumer type ports. Mostly dequeue */
1790                 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1791                 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1792         }
1793         ev_port->credit_update_quanta = sw_credit_quanta;
1794         ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1795
1796         /* Tear down pre-existing port->queue links */
1797         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1798                 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1799
1800         dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1801
1802         return 0;
1803 }
1804
1805 static int16_t
1806 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1807                             uint32_t qm_port_id,
1808                             uint16_t qm_qid,
1809                             uint8_t priority)
1810 {
1811         struct dlb2_map_qid_args cfg;
1812         int32_t ret;
1813
1814         if (handle == NULL)
1815                 return -EINVAL;
1816
1817         /* Build message */
1818         cfg.port_id = qm_port_id;
1819         cfg.qid = qm_qid;
1820         cfg.priority = EV_TO_DLB2_PRIO(priority);
1821
1822         ret = dlb2_iface_map_qid(handle, &cfg);
1823         if (ret < 0) {
1824                 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1825                              ret, dlb2_error_strings[cfg.response.status]);
1826                 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1827                              handle->domain_id, cfg.port_id,
1828                              cfg.qid,
1829                              cfg.priority);
1830         } else {
1831                 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1832                              qm_qid, qm_port_id);
1833         }
1834
1835         return ret;
1836 }
1837
1838 static int
1839 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1840                           struct dlb2_eventdev_port *ev_port,
1841                           struct dlb2_eventdev_queue *ev_queue,
1842                           uint8_t priority)
1843 {
1844         int first_avail = -1;
1845         int ret, i;
1846
1847         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1848                 if (ev_port->link[i].valid) {
1849                         if (ev_port->link[i].queue_id == ev_queue->id &&
1850                             ev_port->link[i].priority == priority) {
1851                                 if (ev_port->link[i].mapped)
1852                                         return 0; /* already mapped */
1853                                 first_avail = i;
1854                         }
1855                 } else if (first_avail == -1)
1856                         first_avail = i;
1857         }
1858         if (first_avail == -1) {
1859                 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1860                              ev_port->qm_port.id);
1861                 return -EINVAL;
1862         }
1863
1864         ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1865                                           ev_port->qm_port.id,
1866                                           ev_queue->qm_queue.id,
1867                                           priority);
1868
1869         if (!ret)
1870                 ev_port->link[first_avail].mapped = true;
1871
1872         return ret;
1873 }
1874
1875 static int32_t
1876 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1877                          struct dlb2_eventdev_queue *ev_queue,
1878                          int32_t qm_port_id)
1879 {
1880         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1881         struct dlb2_create_dir_queue_args cfg;
1882         int32_t ret;
1883
1884         /* The directed port is always configured before its queue */
1885         cfg.port_id = qm_port_id;
1886
1887         if (ev_queue->depth_threshold == 0) {
1888                 cfg.depth_threshold = dlb2->default_depth_thresh;
1889                 ev_queue->depth_threshold =
1890                         dlb2->default_depth_thresh;
1891         } else
1892                 cfg.depth_threshold = ev_queue->depth_threshold;
1893
1894         ret = dlb2_iface_dir_queue_create(handle, &cfg);
1895         if (ret < 0) {
1896                 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1897                              ret, dlb2_error_strings[cfg.response.status]);
1898                 return -EINVAL;
1899         }
1900
1901         return cfg.response.id;
1902 }
1903
1904 static int
1905 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1906                               struct dlb2_eventdev_queue *ev_queue,
1907                               struct dlb2_eventdev_port *ev_port)
1908 {
1909         int32_t qm_qid;
1910
1911         qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1912
1913         if (qm_qid < 0) {
1914                 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1915                 return qm_qid;
1916         }
1917
1918         dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1919
1920         ev_queue->qm_queue.id = qm_qid;
1921
1922         return 0;
1923 }
1924
1925 static int
1926 dlb2_do_port_link(struct rte_eventdev *dev,
1927                   struct dlb2_eventdev_queue *ev_queue,
1928                   struct dlb2_eventdev_port *ev_port,
1929                   uint8_t prio)
1930 {
1931         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1932         int err;
1933
1934         /* Don't link until start time. */
1935         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1936                 return 0;
1937
1938         if (ev_queue->qm_queue.is_directed)
1939                 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1940         else
1941                 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1942
1943         if (err) {
1944                 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1945                              ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1946                              ev_queue->id, ev_port->id);
1947
1948                 rte_errno = err;
1949                 return -1;
1950         }
1951
1952         return 0;
1953 }
1954
1955 static int
1956 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1957                         uint8_t queue_id,
1958                         bool link_exists,
1959                         int index)
1960 {
1961         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1962         struct dlb2_eventdev_queue *ev_queue;
1963         bool port_is_dir, queue_is_dir;
1964
1965         if (queue_id > dlb2->num_queues) {
1966                 rte_errno = -EINVAL;
1967                 return -1;
1968         }
1969
1970         ev_queue = &dlb2->ev_queues[queue_id];
1971
1972         if (!ev_queue->setup_done &&
1973             ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1974                 rte_errno = -EINVAL;
1975                 return -1;
1976         }
1977
1978         port_is_dir = ev_port->qm_port.is_directed;
1979         queue_is_dir = ev_queue->qm_queue.is_directed;
1980
1981         if (port_is_dir != queue_is_dir) {
1982                 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1983                              queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1984                              port_is_dir ? "DIR" : "LDB", ev_port->id);
1985
1986                 rte_errno = -EINVAL;
1987                 return -1;
1988         }
1989
1990         /* Check if there is space for the requested link */
1991         if (!link_exists && index == -1) {
1992                 DLB2_LOG_ERR("no space for new link\n");
1993                 rte_errno = -ENOSPC;
1994                 return -1;
1995         }
1996
1997         /* Check if the directed port is already linked */
1998         if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
1999             !link_exists) {
2000                 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2001                              ev_port->id);
2002                 rte_errno = -EINVAL;
2003                 return -1;
2004         }
2005
2006         /* Check if the directed queue is already linked */
2007         if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2008             !link_exists) {
2009                 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2010                              ev_queue->id);
2011                 rte_errno = -EINVAL;
2012                 return -1;
2013         }
2014
2015         return 0;
2016 }
2017
2018 static int
2019 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2020                         const uint8_t queues[], const uint8_t priorities[],
2021                         uint16_t nb_links)
2022
2023 {
2024         struct dlb2_eventdev_port *ev_port = event_port;
2025         struct dlb2_eventdev *dlb2;
2026         int i, j;
2027
2028         RTE_SET_USED(dev);
2029
2030         if (ev_port == NULL) {
2031                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2032                 rte_errno = -EINVAL;
2033                 return 0;
2034         }
2035
2036         if (!ev_port->setup_done &&
2037             ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2038                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2039                 rte_errno = -EINVAL;
2040                 return 0;
2041         }
2042
2043         /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2044          * queues pointer.
2045          */
2046         if (nb_links == 0) {
2047                 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2048                 return 0; /* Ignore and return success */
2049         }
2050
2051         dlb2 = ev_port->dlb2;
2052
2053         DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2054                      nb_links,
2055                      ev_port->qm_port.is_directed ? "DIR" : "LDB",
2056                      ev_port->id);
2057
2058         for (i = 0; i < nb_links; i++) {
2059                 struct dlb2_eventdev_queue *ev_queue;
2060                 uint8_t queue_id, prio;
2061                 bool found = false;
2062                 int index = -1;
2063
2064                 queue_id = queues[i];
2065                 prio = priorities[i];
2066
2067                 /* Check if the link already exists. */
2068                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2069                         if (ev_port->link[j].valid) {
2070                                 if (ev_port->link[j].queue_id == queue_id) {
2071                                         found = true;
2072                                         index = j;
2073                                         break;
2074                                 }
2075                         } else if (index == -1) {
2076                                 index = j;
2077                         }
2078
2079                 /* could not link */
2080                 if (index == -1)
2081                         break;
2082
2083                 /* Check if already linked at the requested priority */
2084                 if (found && ev_port->link[j].priority == prio)
2085                         continue;
2086
2087                 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2088                         break; /* return index of offending queue */
2089
2090                 ev_queue = &dlb2->ev_queues[queue_id];
2091
2092                 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2093                         break; /* return index of offending queue */
2094
2095                 ev_queue->num_links++;
2096
2097                 ev_port->link[index].queue_id = queue_id;
2098                 ev_port->link[index].priority = prio;
2099                 ev_port->link[index].valid = true;
2100                 /* Entry already exists?  If so, then must be prio change */
2101                 if (!found)
2102                         ev_port->num_links++;
2103         }
2104         return i;
2105 }
2106
2107 static int16_t
2108 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2109                                 uint32_t qm_port_id,
2110                                 uint16_t qm_qid)
2111 {
2112         struct dlb2_unmap_qid_args cfg;
2113         int32_t ret;
2114
2115         if (handle == NULL)
2116                 return -EINVAL;
2117
2118         cfg.port_id = qm_port_id;
2119         cfg.qid = qm_qid;
2120
2121         ret = dlb2_iface_unmap_qid(handle, &cfg);
2122         if (ret < 0)
2123                 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2124                              ret, dlb2_error_strings[cfg.response.status]);
2125
2126         return ret;
2127 }
2128
2129 static int
2130 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2131                             struct dlb2_eventdev_port *ev_port,
2132                             struct dlb2_eventdev_queue *ev_queue)
2133 {
2134         int ret, i;
2135
2136         /* Don't unlink until start time. */
2137         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2138                 return 0;
2139
2140         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2141                 if (ev_port->link[i].valid &&
2142                     ev_port->link[i].queue_id == ev_queue->id)
2143                         break; /* found */
2144         }
2145
2146         /* This is expected with eventdev API!
2147          * It blindly attemmpts to unmap all queues.
2148          */
2149         if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2150                 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2151                              ev_queue->qm_queue.id,
2152                              ev_port->qm_port.id);
2153                 return 0;
2154         }
2155
2156         ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2157                                               ev_port->qm_port.id,
2158                                               ev_queue->qm_queue.id);
2159         if (!ret)
2160                 ev_port->link[i].mapped = false;
2161
2162         return ret;
2163 }
2164
2165 static int
2166 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2167                           uint8_t queues[], uint16_t nb_unlinks)
2168 {
2169         struct dlb2_eventdev_port *ev_port = event_port;
2170         struct dlb2_eventdev *dlb2;
2171         int i;
2172
2173         RTE_SET_USED(dev);
2174
2175         if (!ev_port->setup_done) {
2176                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2177                              ev_port->id);
2178                 rte_errno = -EINVAL;
2179                 return 0;
2180         }
2181
2182         if (queues == NULL || nb_unlinks == 0) {
2183                 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2184                 return 0; /* Ignore and return success */
2185         }
2186
2187         if (ev_port->qm_port.is_directed) {
2188                 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2189                              ev_port->id);
2190                 rte_errno = 0;
2191                 return nb_unlinks; /* as if success */
2192         }
2193
2194         dlb2 = ev_port->dlb2;
2195
2196         for (i = 0; i < nb_unlinks; i++) {
2197                 struct dlb2_eventdev_queue *ev_queue;
2198                 int ret, j;
2199
2200                 if (queues[i] >= dlb2->num_queues) {
2201                         DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2202                         rte_errno = -EINVAL;
2203                         return i; /* return index of offending queue */
2204                 }
2205
2206                 ev_queue = &dlb2->ev_queues[queues[i]];
2207
2208                 /* Does a link exist? */
2209                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2210                         if (ev_port->link[j].queue_id == queues[i] &&
2211                             ev_port->link[j].valid)
2212                                 break;
2213
2214                 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2215                         continue;
2216
2217                 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2218                 if (ret) {
2219                         DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2220                                      ret, ev_port->id, queues[i]);
2221                         rte_errno = -ENOENT;
2222                         return i; /* return index of offending queue */
2223                 }
2224
2225                 ev_port->link[j].valid = false;
2226                 ev_port->num_links--;
2227                 ev_queue->num_links--;
2228         }
2229
2230         return nb_unlinks;
2231 }
2232
2233 static int
2234 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2235                                        void *event_port)
2236 {
2237         struct dlb2_eventdev_port *ev_port = event_port;
2238         struct dlb2_eventdev *dlb2;
2239         struct dlb2_hw_dev *handle;
2240         struct dlb2_pending_port_unmaps_args cfg;
2241         int ret;
2242
2243         RTE_SET_USED(dev);
2244
2245         if (!ev_port->setup_done) {
2246                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2247                              ev_port->id);
2248                 rte_errno = -EINVAL;
2249                 return 0;
2250         }
2251
2252         cfg.port_id = ev_port->qm_port.id;
2253         dlb2 = ev_port->dlb2;
2254         handle = &dlb2->qm_instance;
2255         ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2256
2257         if (ret < 0) {
2258                 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2259                              ret, dlb2_error_strings[cfg.response.status]);
2260                 return ret;
2261         }
2262
2263         return cfg.response.id;
2264 }
2265
2266 static int
2267 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2268 {
2269         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2270         int ret, i;
2271
2272         /* If an event queue or port was previously configured, but hasn't been
2273          * reconfigured, reapply its original configuration.
2274          */
2275         for (i = 0; i < dlb2->num_queues; i++) {
2276                 struct dlb2_eventdev_queue *ev_queue;
2277
2278                 ev_queue = &dlb2->ev_queues[i];
2279
2280                 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2281                         continue;
2282
2283                 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2284                 if (ret < 0) {
2285                         DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2286                         return ret;
2287                 }
2288         }
2289
2290         for (i = 0; i < dlb2->num_ports; i++) {
2291                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2292
2293                 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2294                         continue;
2295
2296                 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2297                 if (ret < 0) {
2298                         DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2299                                      i);
2300                         return ret;
2301                 }
2302         }
2303
2304         return 0;
2305 }
2306
2307 static int
2308 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2309 {
2310         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2311         int i;
2312
2313         /* Perform requested port->queue links */
2314         for (i = 0; i < dlb2->num_ports; i++) {
2315                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2316                 int j;
2317
2318                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2319                         struct dlb2_eventdev_queue *ev_queue;
2320                         uint8_t prio, queue_id;
2321
2322                         if (!ev_port->link[j].valid)
2323                                 continue;
2324
2325                         prio = ev_port->link[j].priority;
2326                         queue_id = ev_port->link[j].queue_id;
2327
2328                         if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2329                                 return -EINVAL;
2330
2331                         ev_queue = &dlb2->ev_queues[queue_id];
2332
2333                         if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2334                                 return -EINVAL;
2335                 }
2336         }
2337
2338         return 0;
2339 }
2340
2341 static int
2342 dlb2_eventdev_start(struct rte_eventdev *dev)
2343 {
2344         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2345         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2346         struct dlb2_start_domain_args cfg;
2347         int ret, i;
2348
2349         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2350         if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2351                 DLB2_LOG_ERR("bad state %d for dev_start\n",
2352                              (int)dlb2->run_state);
2353                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2354                 return -EINVAL;
2355         }
2356         dlb2->run_state = DLB2_RUN_STATE_STARTING;
2357         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2358
2359         /* If the device was configured more than once, some event ports and/or
2360          * queues may need to be reconfigured.
2361          */
2362         ret = dlb2_eventdev_reapply_configuration(dev);
2363         if (ret)
2364                 return ret;
2365
2366         /* The DLB PMD delays port links until the device is started. */
2367         ret = dlb2_eventdev_apply_port_links(dev);
2368         if (ret)
2369                 return ret;
2370
2371         for (i = 0; i < dlb2->num_ports; i++) {
2372                 if (!dlb2->ev_ports[i].setup_done) {
2373                         DLB2_LOG_ERR("dlb2: port %d not setup", i);
2374                         return -ESTALE;
2375                 }
2376         }
2377
2378         for (i = 0; i < dlb2->num_queues; i++) {
2379                 if (dlb2->ev_queues[i].num_links == 0) {
2380                         DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2381                         return -ENOLINK;
2382                 }
2383         }
2384
2385         ret = dlb2_iface_sched_domain_start(handle, &cfg);
2386         if (ret < 0) {
2387                 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2388                              ret, dlb2_error_strings[cfg.response.status]);
2389                 return ret;
2390         }
2391
2392         dlb2->run_state = DLB2_RUN_STATE_STARTED;
2393         DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2394
2395         return 0;
2396 }
2397
2398 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2399         {
2400                 /* Load-balanced cmd bytes */
2401                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2402                 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2403                 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2404         },
2405         {
2406                 /* Directed cmd bytes */
2407                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2408                 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2409                 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2410         },
2411 };
2412
2413 static inline uint32_t
2414 dlb2_port_credits_get(struct dlb2_port *qm_port,
2415                       enum dlb2_hw_queue_types type)
2416 {
2417         uint32_t credits = *qm_port->credit_pool[type];
2418         /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2419         uint32_t batch_size = qm_port->hw_credit_quanta;
2420
2421         if (unlikely(credits < batch_size))
2422                 batch_size = credits;
2423
2424         if (likely(credits &&
2425                    __atomic_compare_exchange_n(
2426                         qm_port->credit_pool[type],
2427                         &credits, credits - batch_size, false,
2428                         __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2429                 return batch_size;
2430         else
2431                 return 0;
2432 }
2433
2434 static inline void
2435 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2436                           struct dlb2_eventdev_port *ev_port)
2437 {
2438         uint16_t quanta = ev_port->credit_update_quanta;
2439
2440         if (ev_port->inflight_credits >= quanta * 2) {
2441                 /* Replenish credits, saving one quanta for enqueues */
2442                 uint16_t val = ev_port->inflight_credits - quanta;
2443
2444                 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2445                 ev_port->inflight_credits -= val;
2446         }
2447 }
2448
2449 static inline int
2450 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2451                               struct dlb2_eventdev_port *ev_port)
2452 {
2453         uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2454                                                 __ATOMIC_SEQ_CST);
2455         const int num = 1;
2456
2457         if (unlikely(ev_port->inflight_max < sw_inflights)) {
2458                 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2459                 rte_errno = -ENOSPC;
2460                 return 1;
2461         }
2462
2463         if (ev_port->inflight_credits < num) {
2464                 /* check if event enqueue brings ev_port over max threshold */
2465                 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2466
2467                 if (sw_inflights + credit_update_quanta >
2468                                 dlb2->new_event_limit) {
2469                         DLB2_INC_STAT(
2470                         ev_port->stats.traffic.tx_nospc_new_event_limit,
2471                         1);
2472                         rte_errno = -ENOSPC;
2473                         return 1;
2474                 }
2475
2476                 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2477                                    __ATOMIC_SEQ_CST);
2478                 ev_port->inflight_credits += (credit_update_quanta);
2479
2480                 if (ev_port->inflight_credits < num) {
2481                         DLB2_INC_STAT(
2482                         ev_port->stats.traffic.tx_nospc_inflight_credits,
2483                         1);
2484                         rte_errno = -ENOSPC;
2485                         return 1;
2486                 }
2487         }
2488
2489         return 0;
2490 }
2491
2492 static inline int
2493 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2494 {
2495         if (unlikely(qm_port->cached_ldb_credits == 0)) {
2496                 qm_port->cached_ldb_credits =
2497                         dlb2_port_credits_get(qm_port,
2498                                               DLB2_LDB_QUEUE);
2499                 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2500                         DLB2_INC_STAT(
2501                         qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2502                         1);
2503                         DLB2_LOG_DBG("ldb credits exhausted\n");
2504                         return 1; /* credits exhausted */
2505                 }
2506         }
2507
2508         return 0;
2509 }
2510
2511 static inline int
2512 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2513 {
2514         if (unlikely(qm_port->cached_dir_credits == 0)) {
2515                 qm_port->cached_dir_credits =
2516                         dlb2_port_credits_get(qm_port,
2517                                               DLB2_DIR_QUEUE);
2518                 if (unlikely(qm_port->cached_dir_credits == 0)) {
2519                         DLB2_INC_STAT(
2520                         qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2521                         1);
2522                         DLB2_LOG_DBG("dir credits exhausted\n");
2523                         return 1; /* credits exhausted */
2524                 }
2525         }
2526
2527         return 0;
2528 }
2529
2530 static inline int
2531 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2532 {
2533         if (unlikely(qm_port->cached_credits == 0)) {
2534                 qm_port->cached_credits =
2535                         dlb2_port_credits_get(qm_port,
2536                                               DLB2_COMBINED_POOL);
2537                 if (unlikely(qm_port->cached_credits == 0)) {
2538                         DLB2_INC_STAT(
2539                         qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2540                         DLB2_LOG_DBG("credits exhausted\n");
2541                         return 1; /* credits exhausted */
2542                 }
2543         }
2544
2545         return 0;
2546 }
2547
2548 static __rte_always_inline void
2549 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2550               struct process_local_port_data *port_data)
2551 {
2552         dlb2_movdir64b(port_data->pp_addr, qe4);
2553 }
2554
2555 static inline int
2556 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2557 {
2558         struct process_local_port_data *port_data;
2559         struct dlb2_cq_pop_qe *qe;
2560
2561         RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2562
2563         qe = qm_port->consume_qe;
2564
2565         qe->tokens = num - 1;
2566
2567         /* No store fence needed since no pointer is being sent, and CQ token
2568          * pops can be safely reordered with other HCWs.
2569          */
2570         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2571
2572         dlb2_movntdq_single(port_data->pp_addr, qe);
2573
2574         DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2575
2576         qm_port->owed_tokens = 0;
2577
2578         return 0;
2579 }
2580
2581 static inline void
2582 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2583                    bool do_sfence,
2584                    struct process_local_port_data *port_data)
2585 {
2586         /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2587          * application writes complete before enqueueing the QE.
2588          */
2589         if (do_sfence)
2590                 rte_wmb();
2591
2592         dlb2_pp_write(qm_port->qe4, port_data);
2593 }
2594
2595 static inline void
2596 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2597 {
2598         struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2599         int num = qm_port->owed_tokens;
2600
2601         qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2602         qe[idx].tokens = num - 1;
2603
2604         qm_port->owed_tokens = 0;
2605 }
2606
2607 static inline void
2608 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2609                       const struct rte_event ev[],
2610                       int num,
2611                       uint8_t *sched_type,
2612                       uint8_t *queue_id)
2613 {
2614         struct dlb2_enqueue_qe *qe;
2615         uint16_t sched_word[4];
2616         __m128i sse_qe[2];
2617         int i;
2618
2619         qe = qm_port->qe4;
2620
2621         sse_qe[0] = _mm_setzero_si128();
2622         sse_qe[1] = _mm_setzero_si128();
2623
2624         switch (num) {
2625         case 4:
2626                 /* Construct the metadata portion of two HCWs in one 128b SSE
2627                  * register. HCW metadata is constructed in the SSE registers
2628                  * like so:
2629                  * sse_qe[0][63:0]:   qe[0]'s metadata
2630                  * sse_qe[0][127:64]: qe[1]'s metadata
2631                  * sse_qe[1][63:0]:   qe[2]'s metadata
2632                  * sse_qe[1][127:64]: qe[3]'s metadata
2633                  */
2634
2635                 /* Convert the event operation into a command byte and store it
2636                  * in the metadata:
2637                  * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
2638                  * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2639                  * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
2640                  * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2641                  */
2642 #define DLB2_QE_CMD_BYTE 7
2643                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2644                                 cmd_byte_map[qm_port->is_directed][ev[0].op],
2645                                 DLB2_QE_CMD_BYTE);
2646                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2647                                 cmd_byte_map[qm_port->is_directed][ev[1].op],
2648                                 DLB2_QE_CMD_BYTE + 8);
2649                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2650                                 cmd_byte_map[qm_port->is_directed][ev[2].op],
2651                                 DLB2_QE_CMD_BYTE);
2652                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2653                                 cmd_byte_map[qm_port->is_directed][ev[3].op],
2654                                 DLB2_QE_CMD_BYTE + 8);
2655
2656                 /* Store priority, scheduling type, and queue ID in the sched
2657                  * word array because these values are re-used when the
2658                  * destination is a directed queue.
2659                  */
2660                 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2661                                 sched_type[0] << 8 |
2662                                 queue_id[0];
2663                 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2664                                 sched_type[1] << 8 |
2665                                 queue_id[1];
2666                 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2667                                 sched_type[2] << 8 |
2668                                 queue_id[2];
2669                 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2670                                 sched_type[3] << 8 |
2671                                 queue_id[3];
2672
2673                 /* Store the event priority, scheduling type, and queue ID in
2674                  * the metadata:
2675                  * sse_qe[0][31:16] = sched_word[0]
2676                  * sse_qe[0][95:80] = sched_word[1]
2677                  * sse_qe[1][31:16] = sched_word[2]
2678                  * sse_qe[1][95:80] = sched_word[3]
2679                  */
2680 #define DLB2_QE_QID_SCHED_WORD 1
2681                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2682                                              sched_word[0],
2683                                              DLB2_QE_QID_SCHED_WORD);
2684                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2685                                              sched_word[1],
2686                                              DLB2_QE_QID_SCHED_WORD + 4);
2687                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2688                                              sched_word[2],
2689                                              DLB2_QE_QID_SCHED_WORD);
2690                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2691                                              sched_word[3],
2692                                              DLB2_QE_QID_SCHED_WORD + 4);
2693
2694                 /* If the destination is a load-balanced queue, store the lock
2695                  * ID. If it is a directed queue, DLB places this field in
2696                  * bytes 10-11 of the received QE, so we format it accordingly:
2697                  * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
2698                  * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2699                  * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
2700                  * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2701                  */
2702 #define DLB2_QE_LOCK_ID_WORD 2
2703                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2704                                 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2705                                         sched_word[0] : ev[0].flow_id,
2706                                 DLB2_QE_LOCK_ID_WORD);
2707                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2708                                 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2709                                         sched_word[1] : ev[1].flow_id,
2710                                 DLB2_QE_LOCK_ID_WORD + 4);
2711                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2712                                 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2713                                         sched_word[2] : ev[2].flow_id,
2714                                 DLB2_QE_LOCK_ID_WORD);
2715                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2716                                 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2717                                         sched_word[3] : ev[3].flow_id,
2718                                 DLB2_QE_LOCK_ID_WORD + 4);
2719
2720                 /* Store the event type and sub event type in the metadata:
2721                  * sse_qe[0][15:0]  = flow_id[0]
2722                  * sse_qe[0][79:64] = flow_id[1]
2723                  * sse_qe[1][15:0]  = flow_id[2]
2724                  * sse_qe[1][79:64] = flow_id[3]
2725                  */
2726 #define DLB2_QE_EV_TYPE_WORD 0
2727                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2728                                              ev[0].sub_event_type << 8 |
2729                                                 ev[0].event_type,
2730                                              DLB2_QE_EV_TYPE_WORD);
2731                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2732                                              ev[1].sub_event_type << 8 |
2733                                                 ev[1].event_type,
2734                                              DLB2_QE_EV_TYPE_WORD + 4);
2735                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2736                                              ev[2].sub_event_type << 8 |
2737                                                 ev[2].event_type,
2738                                              DLB2_QE_EV_TYPE_WORD);
2739                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2740                                              ev[3].sub_event_type << 8 |
2741                                                 ev[3].event_type,
2742                                              DLB2_QE_EV_TYPE_WORD + 4);
2743
2744                 /* Store the metadata to memory (use the double-precision
2745                  * _mm_storeh_pd because there is no integer function for
2746                  * storing the upper 64b):
2747                  * qe[0] metadata = sse_qe[0][63:0]
2748                  * qe[1] metadata = sse_qe[0][127:64]
2749                  * qe[2] metadata = sse_qe[1][63:0]
2750                  * qe[3] metadata = sse_qe[1][127:64]
2751                  */
2752                 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2753                 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2754                               (__m128d)sse_qe[0]);
2755                 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2756                 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2757                               (__m128d)sse_qe[1]);
2758
2759                 qe[0].data = ev[0].u64;
2760                 qe[1].data = ev[1].u64;
2761                 qe[2].data = ev[2].u64;
2762                 qe[3].data = ev[3].u64;
2763
2764                 break;
2765         case 3:
2766         case 2:
2767         case 1:
2768                 for (i = 0; i < num; i++) {
2769                         qe[i].cmd_byte =
2770                                 cmd_byte_map[qm_port->is_directed][ev[i].op];
2771                         qe[i].sched_type = sched_type[i];
2772                         qe[i].data = ev[i].u64;
2773                         qe[i].qid = queue_id[i];
2774                         qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2775                         qe[i].lock_id = ev[i].flow_id;
2776                         if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2777                                 struct dlb2_msg_info *info =
2778                                         (struct dlb2_msg_info *)&qe[i].lock_id;
2779
2780                                 info->qid = queue_id[i];
2781                                 info->sched_type = DLB2_SCHED_DIRECTED;
2782                                 info->priority = qe[i].priority;
2783                         }
2784                         qe[i].u.event_type.major = ev[i].event_type;
2785                         qe[i].u.event_type.sub = ev[i].sub_event_type;
2786                 }
2787                 break;
2788         case 0:
2789                 break;
2790         }
2791 }
2792
2793 static inline int
2794 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2795                         struct dlb2_port *qm_port,
2796                         const struct rte_event ev[],
2797                         uint8_t *sched_type,
2798                         uint8_t *queue_id)
2799 {
2800         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2801         struct dlb2_eventdev_queue *ev_queue;
2802         uint16_t *cached_credits = NULL;
2803         struct dlb2_queue *qm_queue;
2804
2805         ev_queue = &dlb2->ev_queues[ev->queue_id];
2806         qm_queue = &ev_queue->qm_queue;
2807         *queue_id = qm_queue->id;
2808
2809         /* Ignore sched_type and hardware credits on release events */
2810         if (ev->op == RTE_EVENT_OP_RELEASE)
2811                 goto op_check;
2812
2813         if (!qm_queue->is_directed) {
2814                 /* Load balanced destination queue */
2815
2816                 if (dlb2->version == DLB2_HW_V2) {
2817                         if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2818                                 rte_errno = -ENOSPC;
2819                                 return 1;
2820                         }
2821                         cached_credits = &qm_port->cached_ldb_credits;
2822                 } else {
2823                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2824                                 rte_errno = -ENOSPC;
2825                                 return 1;
2826                         }
2827                         cached_credits = &qm_port->cached_credits;
2828                 }
2829                 switch (ev->sched_type) {
2830                 case RTE_SCHED_TYPE_ORDERED:
2831                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2832                         if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2833                                 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2834                                              *queue_id);
2835                                 rte_errno = -EINVAL;
2836                                 return 1;
2837                         }
2838                         *sched_type = DLB2_SCHED_ORDERED;
2839                         break;
2840                 case RTE_SCHED_TYPE_ATOMIC:
2841                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2842                         *sched_type = DLB2_SCHED_ATOMIC;
2843                         break;
2844                 case RTE_SCHED_TYPE_PARALLEL:
2845                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2846                         if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2847                                 *sched_type = DLB2_SCHED_ORDERED;
2848                         else
2849                                 *sched_type = DLB2_SCHED_UNORDERED;
2850                         break;
2851                 default:
2852                         DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2853                         DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2854                         rte_errno = -EINVAL;
2855                         return 1;
2856                 }
2857         } else {
2858                 /* Directed destination queue */
2859
2860                 if (dlb2->version == DLB2_HW_V2) {
2861                         if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2862                                 rte_errno = -ENOSPC;
2863                                 return 1;
2864                         }
2865                         cached_credits = &qm_port->cached_dir_credits;
2866                 } else {
2867                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2868                                 rte_errno = -ENOSPC;
2869                                 return 1;
2870                         }
2871                         cached_credits = &qm_port->cached_credits;
2872                 }
2873                 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2874
2875                 *sched_type = DLB2_SCHED_DIRECTED;
2876         }
2877
2878 op_check:
2879         switch (ev->op) {
2880         case RTE_EVENT_OP_NEW:
2881                 /* Check that a sw credit is available */
2882                 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2883                         rte_errno = -ENOSPC;
2884                         return 1;
2885                 }
2886                 ev_port->inflight_credits--;
2887                 (*cached_credits)--;
2888                 break;
2889         case RTE_EVENT_OP_FORWARD:
2890                 /* Check for outstanding_releases underflow. If this occurs,
2891                  * the application is not using the EVENT_OPs correctly; for
2892                  * example, forwarding or releasing events that were not
2893                  * dequeued.
2894                  */
2895                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2896                 ev_port->outstanding_releases--;
2897                 qm_port->issued_releases++;
2898                 (*cached_credits)--;
2899                 break;
2900         case RTE_EVENT_OP_RELEASE:
2901                 ev_port->inflight_credits++;
2902                 /* Check for outstanding_releases underflow. If this occurs,
2903                  * the application is not using the EVENT_OPs correctly; for
2904                  * example, forwarding or releasing events that were not
2905                  * dequeued.
2906                  */
2907                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2908                 ev_port->outstanding_releases--;
2909                 qm_port->issued_releases++;
2910
2911                 /* Replenish s/w credits if enough are cached */
2912                 dlb2_replenish_sw_credits(dlb2, ev_port);
2913                 break;
2914         }
2915
2916         DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2917         DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2918
2919 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2920         if (ev->op != RTE_EVENT_OP_RELEASE) {
2921                 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2922                 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2923         }
2924 #endif
2925
2926         return 0;
2927 }
2928
2929 static inline uint16_t
2930 __dlb2_event_enqueue_burst(void *event_port,
2931                            const struct rte_event events[],
2932                            uint16_t num,
2933                            bool use_delayed)
2934 {
2935         struct dlb2_eventdev_port *ev_port = event_port;
2936         struct dlb2_port *qm_port = &ev_port->qm_port;
2937         struct process_local_port_data *port_data;
2938         int i;
2939
2940         RTE_ASSERT(ev_port->enq_configured);
2941         RTE_ASSERT(events != NULL);
2942
2943         i = 0;
2944
2945         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2946
2947         while (i < num) {
2948                 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2949                 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2950                 int pop_offs = 0;
2951                 int j = 0;
2952
2953                 memset(qm_port->qe4,
2954                        0,
2955                        DLB2_NUM_QES_PER_CACHE_LINE *
2956                        sizeof(struct dlb2_enqueue_qe));
2957
2958                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2959                         const struct rte_event *ev = &events[i + j];
2960                         int16_t thresh = qm_port->token_pop_thresh;
2961
2962                         if (use_delayed &&
2963                             qm_port->token_pop_mode == DELAYED_POP &&
2964                             (ev->op == RTE_EVENT_OP_FORWARD ||
2965                              ev->op == RTE_EVENT_OP_RELEASE) &&
2966                             qm_port->issued_releases >= thresh - 1) {
2967                                 /* Insert the token pop QE and break out. This
2968                                  * may result in a partial HCW, but that is
2969                                  * simpler than supporting arbitrary QE
2970                                  * insertion.
2971                                  */
2972                                 dlb2_construct_token_pop_qe(qm_port, j);
2973
2974                                 /* Reset the releases for the next QE batch */
2975                                 qm_port->issued_releases -= thresh;
2976
2977                                 pop_offs = 1;
2978                                 j++;
2979                                 break;
2980                         }
2981
2982                         if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2983                                                     &sched_types[j],
2984                                                     &queue_ids[j]))
2985                                 break;
2986                 }
2987
2988                 if (j == 0)
2989                         break;
2990
2991                 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2992                                       sched_types, queue_ids);
2993
2994                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2995
2996                 /* Don't include the token pop QE in the enqueue count */
2997                 i += j - pop_offs;
2998
2999                 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3000                  * pop_offs != 0
3001                  */
3002                 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3003                         break;
3004         }
3005
3006         return i;
3007 }
3008
3009 static uint16_t
3010 dlb2_event_enqueue_burst(void *event_port,
3011                              const struct rte_event events[],
3012                              uint16_t num)
3013 {
3014         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3015 }
3016
3017 static uint16_t
3018 dlb2_event_enqueue_burst_delayed(void *event_port,
3019                                      const struct rte_event events[],
3020                                      uint16_t num)
3021 {
3022         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3023 }
3024
3025 static inline uint16_t
3026 dlb2_event_enqueue(void *event_port,
3027                    const struct rte_event events[])
3028 {
3029         return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3030 }
3031
3032 static inline uint16_t
3033 dlb2_event_enqueue_delayed(void *event_port,
3034                            const struct rte_event events[])
3035 {
3036         return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3037 }
3038
3039 static uint16_t
3040 dlb2_event_enqueue_new_burst(void *event_port,
3041                              const struct rte_event events[],
3042                              uint16_t num)
3043 {
3044         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3045 }
3046
3047 static uint16_t
3048 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3049                                      const struct rte_event events[],
3050                                      uint16_t num)
3051 {
3052         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3053 }
3054
3055 static uint16_t
3056 dlb2_event_enqueue_forward_burst(void *event_port,
3057                                  const struct rte_event events[],
3058                                  uint16_t num)
3059 {
3060         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3061 }
3062
3063 static uint16_t
3064 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3065                                          const struct rte_event events[],
3066                                          uint16_t num)
3067 {
3068         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3069 }
3070
3071 static void
3072 dlb2_event_release(struct dlb2_eventdev *dlb2,
3073                    uint8_t port_id,
3074                    int n)
3075 {
3076         struct process_local_port_data *port_data;
3077         struct dlb2_eventdev_port *ev_port;
3078         struct dlb2_port *qm_port;
3079         int i;
3080
3081         if (port_id > dlb2->num_ports) {
3082                 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3083                              port_id);
3084                 rte_errno = -EINVAL;
3085                 return;
3086         }
3087
3088         ev_port = &dlb2->ev_ports[port_id];
3089         qm_port = &ev_port->qm_port;
3090         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3091
3092         i = 0;
3093
3094         if (qm_port->is_directed) {
3095                 i = n;
3096                 goto sw_credit_update;
3097         }
3098
3099         while (i < n) {
3100                 int pop_offs = 0;
3101                 int j = 0;
3102
3103                 /* Zero-out QEs */
3104                 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3105                 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3106                 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3107                 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3108
3109
3110                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3111                         int16_t thresh = qm_port->token_pop_thresh;
3112
3113                         if (qm_port->token_pop_mode == DELAYED_POP &&
3114                             qm_port->issued_releases >= thresh - 1) {
3115                                 /* Insert the token pop QE */
3116                                 dlb2_construct_token_pop_qe(qm_port, j);
3117
3118                                 /* Reset the releases for the next QE batch */
3119                                 qm_port->issued_releases -= thresh;
3120
3121                                 pop_offs = 1;
3122                                 j++;
3123                                 break;
3124                         }
3125
3126                         qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3127                         qm_port->issued_releases++;
3128                 }
3129
3130                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3131
3132                 /* Don't include the token pop QE in the release count */
3133                 i += j - pop_offs;
3134         }
3135
3136 sw_credit_update:
3137         /* each release returns one credit */
3138         if (unlikely(!ev_port->outstanding_releases)) {
3139                 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3140                              __func__);
3141                 return;
3142         }
3143         ev_port->outstanding_releases -= i;
3144         ev_port->inflight_credits += i;
3145
3146         /* Replenish s/w credits if enough releases are performed */
3147         dlb2_replenish_sw_credits(dlb2, ev_port);
3148 }
3149
3150 static inline void
3151 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3152 {
3153         uint32_t batch_size = qm_port->hw_credit_quanta;
3154
3155         /* increment port credits, and return to pool if exceeds threshold */
3156         if (!qm_port->is_directed) {
3157                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3158                         qm_port->cached_ldb_credits += num;
3159                         if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3160                                 __atomic_fetch_add(
3161                                         qm_port->credit_pool[DLB2_LDB_QUEUE],
3162                                         batch_size, __ATOMIC_SEQ_CST);
3163                                 qm_port->cached_ldb_credits -= batch_size;
3164                         }
3165                 } else {
3166                         qm_port->cached_credits += num;
3167                         if (qm_port->cached_credits >= 2 * batch_size) {
3168                                 __atomic_fetch_add(
3169                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3170                                       batch_size, __ATOMIC_SEQ_CST);
3171                                 qm_port->cached_credits -= batch_size;
3172                         }
3173                 }
3174         } else {
3175                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3176                         qm_port->cached_dir_credits += num;
3177                         if (qm_port->cached_dir_credits >= 2 * batch_size) {
3178                                 __atomic_fetch_add(
3179                                         qm_port->credit_pool[DLB2_DIR_QUEUE],
3180                                         batch_size, __ATOMIC_SEQ_CST);
3181                                 qm_port->cached_dir_credits -= batch_size;
3182                         }
3183                 } else {
3184                         qm_port->cached_credits += num;
3185                         if (qm_port->cached_credits >= 2 * batch_size) {
3186                                 __atomic_fetch_add(
3187                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3188                                       batch_size, __ATOMIC_SEQ_CST);
3189                                 qm_port->cached_credits -= batch_size;
3190                         }
3191                 }
3192         }
3193 }
3194
3195 #define CLB_MASK_IDX 0
3196 #define CLB_VAL_IDX 1
3197 static int
3198 dlb2_monitor_callback(const uint64_t val,
3199                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3200 {
3201         /* abort if the value matches */
3202         return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3203 }
3204
3205 static inline int
3206 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3207                   struct dlb2_eventdev_port *ev_port,
3208                   struct dlb2_port *qm_port,
3209                   uint64_t timeout,
3210                   uint64_t start_ticks)
3211 {
3212         struct process_local_port_data *port_data;
3213         uint64_t elapsed_ticks;
3214
3215         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3216
3217         elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3218
3219         /* Wait/poll time expired */
3220         if (elapsed_ticks >= timeout) {
3221                 return 1;
3222         } else if (dlb2->umwait_allowed) {
3223                 struct rte_power_monitor_cond pmc;
3224                 volatile struct dlb2_dequeue_qe *cq_base;
3225                 union {
3226                         uint64_t raw_qe[2];
3227                         struct dlb2_dequeue_qe qe;
3228                 } qe_mask;
3229                 uint64_t expected_value;
3230                 volatile uint64_t *monitor_addr;
3231
3232                 qe_mask.qe.cq_gen = 1; /* set mask */
3233
3234                 cq_base = port_data->cq_base;
3235                 monitor_addr = (volatile uint64_t *)(volatile void *)
3236                         &cq_base[qm_port->cq_idx];
3237                 monitor_addr++; /* cq_gen bit is in second 64bit location */
3238
3239                 if (qm_port->gen_bit)
3240                         expected_value = qe_mask.raw_qe[1];
3241                 else
3242                         expected_value = 0;
3243
3244                 pmc.addr = monitor_addr;
3245                 /* store expected value and comparison mask in opaque data */
3246                 pmc.opaque[CLB_VAL_IDX] = expected_value;
3247                 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3248                 /* set up callback */
3249                 pmc.fn = dlb2_monitor_callback;
3250                 pmc.size = sizeof(uint64_t);
3251
3252                 rte_power_monitor(&pmc, timeout + start_ticks);
3253
3254                 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3255         } else {
3256                 uint64_t poll_interval = dlb2->poll_interval;
3257                 uint64_t curr_ticks = rte_get_timer_cycles();
3258                 uint64_t init_ticks = curr_ticks;
3259
3260                 while ((curr_ticks - start_ticks < timeout) &&
3261                        (curr_ticks - init_ticks < poll_interval))
3262                         curr_ticks = rte_get_timer_cycles();
3263         }
3264
3265         return 0;
3266 }
3267
3268 static __rte_noinline int
3269 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3270                          struct dlb2_port *qm_port,
3271                          struct rte_event *events,
3272                          struct dlb2_dequeue_qe *qes,
3273                          int cnt)
3274 {
3275         uint8_t *qid_mappings = qm_port->qid_mappings;
3276         int i, num, evq_id;
3277
3278         for (i = 0, num = 0; i < cnt; i++) {
3279                 struct dlb2_dequeue_qe *qe = &qes[i];
3280                 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3281                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3282                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3283                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3284                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3285                 };
3286
3287                 /* Fill in event information.
3288                  * Note that flow_id must be embedded in the data by
3289                  * the app, such as the mbuf RSS hash field if the data
3290                  * buffer is a mbuf.
3291                  */
3292                 if (unlikely(qe->error)) {
3293                         DLB2_LOG_ERR("QE error bit ON\n");
3294                         DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3295                         dlb2_consume_qe_immediate(qm_port, 1);
3296                         continue; /* Ignore */
3297                 }
3298
3299                 events[num].u64 = qe->data;
3300                 events[num].flow_id = qe->flow_id;
3301                 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3302                 events[num].event_type = qe->u.event_type.major;
3303                 events[num].sub_event_type = qe->u.event_type.sub;
3304                 events[num].sched_type = sched_type_map[qe->sched_type];
3305                 events[num].impl_opaque = qe->qid_depth;
3306
3307                 /* qid not preserved for directed queues */
3308                 if (qm_port->is_directed)
3309                         evq_id = ev_port->link[0].queue_id;
3310                 else
3311                         evq_id = qid_mappings[qe->qid];
3312
3313                 events[num].queue_id = evq_id;
3314                 DLB2_INC_STAT(
3315                         ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3316                         1);
3317                 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3318                 num++;
3319         }
3320
3321         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3322
3323         return num;
3324 }
3325
3326 static inline int
3327 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3328                               struct dlb2_port *qm_port,
3329                               struct rte_event *events,
3330                               struct dlb2_dequeue_qe *qes)
3331 {
3332         int sched_type_map[] = {
3333                 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3334                 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3335                 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3336                 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3337         };
3338         const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3339         uint8_t *qid_mappings = qm_port->qid_mappings;
3340         __m128i sse_evt[2];
3341
3342         /* In the unlikely case that any of the QE error bits are set, process
3343          * them one at a time.
3344          */
3345         if (unlikely(qes[0].error || qes[1].error ||
3346                      qes[2].error || qes[3].error))
3347                 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3348                                                  qes, num_events);
3349
3350         events[0].u64 = qes[0].data;
3351         events[1].u64 = qes[1].data;
3352         events[2].u64 = qes[2].data;
3353         events[3].u64 = qes[3].data;
3354
3355         /* Construct the metadata portion of two struct rte_events
3356          * in one 128b SSE register. Event metadata is constructed in the SSE
3357          * registers like so:
3358          * sse_evt[0][63:0]:   event[0]'s metadata
3359          * sse_evt[0][127:64]: event[1]'s metadata
3360          * sse_evt[1][63:0]:   event[2]'s metadata
3361          * sse_evt[1][127:64]: event[3]'s metadata
3362          */
3363         sse_evt[0] = _mm_setzero_si128();
3364         sse_evt[1] = _mm_setzero_si128();
3365
3366         /* Convert the hardware queue ID to an event queue ID and store it in
3367          * the metadata:
3368          * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
3369          * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3370          * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
3371          * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3372          */
3373 #define DLB_EVENT_QUEUE_ID_BYTE 5
3374         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3375                                      qid_mappings[qes[0].qid],
3376                                      DLB_EVENT_QUEUE_ID_BYTE);
3377         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3378                                      qid_mappings[qes[1].qid],
3379                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3380         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3381                                      qid_mappings[qes[2].qid],
3382                                      DLB_EVENT_QUEUE_ID_BYTE);
3383         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3384                                      qid_mappings[qes[3].qid],
3385                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3386
3387         /* Convert the hardware priority to an event priority and store it in
3388          * the metadata, while also returning the queue depth status
3389          * value captured by the hardware, storing it in impl_opaque, which can
3390          * be read by the application but not modified
3391          * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
3392          * sse_evt[0][63:56]   = qes[0].qid_depth
3393          * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3394          * sse_evt[0][127:120] = qes[1].qid_depth
3395          * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
3396          * sse_evt[1][63:56]   = qes[2].qid_depth
3397          * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3398          * sse_evt[1][127:120] = qes[3].qid_depth
3399          */
3400 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3401 #define DLB_BYTE_SHIFT 8
3402         sse_evt[0] =
3403                 _mm_insert_epi16(sse_evt[0],
3404                         DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3405                         (qes[0].qid_depth << DLB_BYTE_SHIFT),
3406                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3407         sse_evt[0] =
3408                 _mm_insert_epi16(sse_evt[0],
3409                         DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3410                         (qes[1].qid_depth << DLB_BYTE_SHIFT),
3411                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3412         sse_evt[1] =
3413                 _mm_insert_epi16(sse_evt[1],
3414                         DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3415                         (qes[2].qid_depth << DLB_BYTE_SHIFT),
3416                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3417         sse_evt[1] =
3418                 _mm_insert_epi16(sse_evt[1],
3419                         DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3420                         (qes[3].qid_depth << DLB_BYTE_SHIFT),
3421                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3422
3423         /* Write the event type, sub event type, and flow_id to the event
3424          * metadata.
3425          * sse_evt[0][31:0]   = qes[0].flow_id |
3426          *                      qes[0].u.event_type.major << 28 |
3427          *                      qes[0].u.event_type.sub << 20;
3428          * sse_evt[0][95:64]  = qes[1].flow_id |
3429          *                      qes[1].u.event_type.major << 28 |
3430          *                      qes[1].u.event_type.sub << 20;
3431          * sse_evt[1][31:0]   = qes[2].flow_id |
3432          *                      qes[2].u.event_type.major << 28 |
3433          *                      qes[2].u.event_type.sub << 20;
3434          * sse_evt[1][95:64]  = qes[3].flow_id |
3435          *                      qes[3].u.event_type.major << 28 |
3436          *                      qes[3].u.event_type.sub << 20;
3437          */
3438 #define DLB_EVENT_EV_TYPE_DW 0
3439 #define DLB_EVENT_EV_TYPE_SHIFT 28
3440 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3441         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3442                         qes[0].flow_id |
3443                         qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3444                         qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3445                         DLB_EVENT_EV_TYPE_DW);
3446         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3447                         qes[1].flow_id |
3448                         qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3449                         qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3450                         DLB_EVENT_EV_TYPE_DW + 2);
3451         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3452                         qes[2].flow_id |
3453                         qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3454                         qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3455                         DLB_EVENT_EV_TYPE_DW);
3456         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3457                         qes[3].flow_id |
3458                         qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
3459                         qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3460                         DLB_EVENT_EV_TYPE_DW + 2);
3461
3462         /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3463          * set:
3464          * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
3465          * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3466          * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
3467          * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3468          */
3469 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3470 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3471         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3472                 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3473                 DLB_EVENT_SCHED_TYPE_BYTE);
3474         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3475                 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3476                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3477         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3478                 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3479                 DLB_EVENT_SCHED_TYPE_BYTE);
3480         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3481                 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3482                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3483
3484         /* Store the metadata to the event (use the double-precision
3485          * _mm_storeh_pd because there is no integer function for storing the
3486          * upper 64b):
3487          * events[0].event = sse_evt[0][63:0]
3488          * events[1].event = sse_evt[0][127:64]
3489          * events[2].event = sse_evt[1][63:0]
3490          * events[3].event = sse_evt[1][127:64]
3491          */
3492         _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3493         _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3494         _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3495         _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3496
3497         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3498         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3499         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3500         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3501
3502         DLB2_INC_STAT(
3503                 ev_port->stats.queue[events[0].queue_id].
3504                         qid_depth[qes[0].qid_depth],
3505                 1);
3506         DLB2_INC_STAT(
3507                 ev_port->stats.queue[events[1].queue_id].
3508                         qid_depth[qes[1].qid_depth],
3509                 1);
3510         DLB2_INC_STAT(
3511                 ev_port->stats.queue[events[2].queue_id].
3512                         qid_depth[qes[2].qid_depth],
3513                 1);
3514         DLB2_INC_STAT(
3515                 ev_port->stats.queue[events[3].queue_id].
3516                         qid_depth[qes[3].qid_depth],
3517                 1);
3518
3519         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3520
3521         return num_events;
3522 }
3523
3524 static __rte_always_inline int
3525 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3526 {
3527         volatile struct dlb2_dequeue_qe *cq_addr;
3528         uint8_t xor_mask[2] = {0x0F, 0x00};
3529         const uint8_t and_mask = 0x0F;
3530         __m128i *qes = (__m128i *)qe;
3531         uint8_t gen_bits, gen_bit;
3532         uintptr_t addr[4];
3533         uint16_t idx;
3534
3535         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3536
3537         idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3538         /* Load the next 4 QEs */
3539         addr[0] = (uintptr_t)&cq_addr[idx];
3540         addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3541         addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3542         addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3543
3544         /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3545         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3546         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3547         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3548         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3549
3550         /* Correct the xor_mask for wrap-around QEs */
3551         gen_bit = qm_port->gen_bit;
3552         xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3553         xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3554         xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3555
3556         /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3557          * valid, then QEs[0:N-1] are too.
3558          */
3559         qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3560         rte_compiler_barrier();
3561         qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3562         rte_compiler_barrier();
3563         qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3564         rte_compiler_barrier();
3565         qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3566
3567         /* Extract and combine the gen bits */
3568         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3569                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3570                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3571                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3572
3573         /* XOR the combined bits such that a 1 represents a valid QE */
3574         gen_bits ^= xor_mask[gen_bit];
3575
3576         /* Mask off gen bits we don't care about */
3577         gen_bits &= and_mask;
3578
3579         return __builtin_popcount(gen_bits);
3580 }
3581
3582 static inline void
3583 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3584                           struct rte_event *events,
3585                           __m128i v_qe_3,
3586                           __m128i v_qe_2,
3587                           __m128i v_qe_1,
3588                           __m128i v_qe_0,
3589                           __m128i v_qe_meta,
3590                           __m128i v_qe_status,
3591                           uint32_t valid_events)
3592 {
3593         /* Look up the event QIDs, using the hardware QIDs to index the
3594          * port's QID mapping.
3595          *
3596          * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3597          * passed along in registers as the QE data is required later.
3598          *
3599          * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3600          * 32-bit slice of each QE, so makes up a full SSE register. This
3601          * allows parallel processing of 4x QEs in a single register.
3602          */
3603
3604         __m128i v_qid_done = {0};
3605         int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3606         int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3607         int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3608         int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3609
3610         int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3611         int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3612         int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3613         int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3614
3615         int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3616         int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3617         int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3618         int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3619
3620         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3621         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3622         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3623         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3624
3625         /* Schedule field remapping using byte shuffle
3626          * - Full byte containing sched field handled here (op, rsvd are zero)
3627          * - Note sanitizing the register requires two masking ANDs:
3628          *   1) to strip prio/msg_type from byte for correct shuffle lookup
3629          *   2) to strip any non-sched-field lanes from any results to OR later
3630          * - Final byte result is >> 10 to another byte-lane inside the u32.
3631          *   This makes the final combination OR easier to make the rte_event.
3632          */
3633         __m128i v_sched_done;
3634         __m128i v_sched_bits;
3635         {
3636                 static const uint8_t sched_type_map[16] = {
3637                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3638                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3639                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3640                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3641                 };
3642                 static const uint8_t sched_and_mask[16] = {
3643                         0x00, 0x00, 0x00, 0x03,
3644                         0x00, 0x00, 0x00, 0x03,
3645                         0x00, 0x00, 0x00, 0x03,
3646                         0x00, 0x00, 0x00, 0x03,
3647                 };
3648                 const __m128i v_sched_map = _mm_loadu_si128(
3649                                              (const __m128i *)sched_type_map);
3650                 __m128i v_sched_mask = _mm_loadu_si128(
3651                                              (const __m128i *)&sched_and_mask);
3652                 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3653                 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3654                                                             v_sched_bits);
3655                 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3656                                                    v_sched_mask);
3657                 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3658         }
3659
3660         /* Priority handling
3661          * - QE provides 3 bits of priority
3662          * - Shift << 3 to move to MSBs for byte-prio in rte_event
3663          * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3664          */
3665         __m128i v_prio_done;
3666         {
3667                 static const uint8_t prio_mask[16] = {
3668                         0x00, 0x00, 0x00, 0x07 << 5,
3669                         0x00, 0x00, 0x00, 0x07 << 5,
3670                         0x00, 0x00, 0x00, 0x07 << 5,
3671                         0x00, 0x00, 0x00, 0x07 << 5,
3672                 };
3673                 __m128i v_prio_mask  = _mm_loadu_si128(
3674                                                 (const __m128i *)prio_mask);
3675                 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3676                 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3677         }
3678
3679         /* Event Sub/Type handling:
3680          * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3681          * to get the sub/ev type data into rte_event location, clearing the
3682          * lower 20 bits in the process.
3683          */
3684         __m128i v_types_done;
3685         {
3686                 static const uint8_t event_mask[16] = {
3687                         0x0f, 0x00, 0x00, 0x00,
3688                         0x0f, 0x00, 0x00, 0x00,
3689                         0x0f, 0x00, 0x00, 0x00,
3690                         0x0f, 0x00, 0x00, 0x00,
3691                 };
3692                 static const uint8_t sub_event_mask[16] = {
3693                         0xff, 0x00, 0x00, 0x00,
3694                         0xff, 0x00, 0x00, 0x00,
3695                         0xff, 0x00, 0x00, 0x00,
3696                         0xff, 0x00, 0x00, 0x00,
3697                 };
3698                 static const uint8_t flow_mask[16] = {
3699                         0xff, 0xff, 0x00, 0x00,
3700                         0xff, 0xff, 0x00, 0x00,
3701                         0xff, 0xff, 0x00, 0x00,
3702                         0xff, 0xff, 0x00, 0x00,
3703                 };
3704                 __m128i v_event_mask  = _mm_loadu_si128(
3705                                         (const __m128i *)event_mask);
3706                 __m128i v_sub_event_mask  = _mm_loadu_si128(
3707                                         (const __m128i *)sub_event_mask);
3708                 __m128i v_flow_mask  = _mm_loadu_si128(
3709                                        (const __m128i *)flow_mask);
3710                 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3711                 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3712                 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3713                 v_type = _mm_slli_epi32(v_type, 8);
3714                 v_types_done = _mm_or_si128(v_type, v_sub);
3715                 v_types_done = _mm_slli_epi32(v_types_done, 20);
3716                 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3717                 v_types_done = _mm_or_si128(v_types_done, v_flow);
3718         }
3719
3720         /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3721          * with the rte_event, allowing unpacks to move/blend with payload.
3722          */
3723         __m128i v_q_s_p_done;
3724         {
3725                 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3726                 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3727                 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3728         }
3729
3730         __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3731
3732         /* Unpack evs into u64 metadata, then indiv events */
3733         v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3734         v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3735
3736         switch (valid_events) {
3737         case 4:
3738                 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3739                 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3740                 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3741                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3742                               1);
3743                 /* fallthrough */
3744         case 3:
3745                 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3746                 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3747                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3748                               1);
3749                 /* fallthrough */
3750         case 2:
3751                 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3752                 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3753                 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3754                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3755                               1);
3756                 /* fallthrough */
3757         case 1:
3758                 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3759                 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3760                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3761                               1);
3762         }
3763 }
3764
3765 static __rte_always_inline int
3766 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3767                         uint32_t max_events)
3768 {
3769         /* Using unmasked idx for perf, and masking manually */
3770         uint16_t idx = qm_port->cq_idx_unmasked;
3771         volatile struct dlb2_dequeue_qe *cq_addr;
3772
3773         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3774
3775         uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3776                                                  qm_port->cq_depth_mask];
3777         uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx +  8) &
3778                                                  qm_port->cq_depth_mask];
3779         uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx +  4) &
3780                                                  qm_port->cq_depth_mask];
3781         uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx +  0) &
3782                                                  qm_port->cq_depth_mask];
3783
3784         /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3785         __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3786         rte_compiler_barrier();
3787         __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3788         rte_compiler_barrier();
3789         __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3790         rte_compiler_barrier();
3791         __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3792
3793         /* Generate the pkt_shuffle mask;
3794          * - Avoids load in otherwise load-heavy section of code
3795          * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3796          */
3797         const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3798         __m128i v_zeros = _mm_setzero_si128();
3799         __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3800         __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3801
3802         /* Extract u32 components required from the QE
3803          * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3804          * - QE[96 to 127] for status (cq gen bit, error)
3805          *
3806          * Note that stage 1 of the unpacking is re-used for both u32 extracts
3807          */
3808         __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3809         __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3810         __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3811         __m128i v_qe_meta   = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3812
3813         /* Status byte (gen_bit, error) handling:
3814          * - Shuffle to lanes 0,1,2,3, clear all others
3815          * - Shift right by 7 for gen bit to MSB, movemask to scalar
3816          * - Shift right by 2 for error bit to MSB, movemask to scalar
3817          */
3818         __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3819         __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3820         int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3821
3822         /* Expected vs Reality of QE Gen bits
3823          * - cq_rolling_mask provides expected bits
3824          * - QE loads, unpacks/shuffle and movemask provides reality
3825          * - XOR of the two gives bitmask of new packets
3826          * - POPCNT to get the number of new events
3827          */
3828         uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3829         uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3830         uint32_t count_new = __builtin_popcount(qe_xor_bits);
3831         count_new = RTE_MIN(count_new, max_events);
3832         if (!count_new)
3833                 return 0;
3834
3835         /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3836
3837         uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3838         uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3839         uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3840         uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3841
3842         /* shifted out of m2 into MSB of m */
3843         qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3844
3845         /* shifted out of m "looped back" into MSB of m2 */
3846         qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3847
3848         /* Prefetch the next QEs - should run as IPC instead of cycles */
3849         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3850         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3851         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3852         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3853
3854         /* Convert QEs from XMM regs to events and store events directly */
3855         _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3856                                   v_qe_0, v_qe_meta, v_qe_status, count_new);
3857
3858         return count_new;
3859 }
3860
3861 static inline void
3862 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3863 {
3864         uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3865
3866         qm_port->cq_idx_unmasked = idx;
3867         qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3868         qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3869 }
3870
3871 static inline int16_t
3872 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3873                        struct dlb2_eventdev_port *ev_port,
3874                        struct rte_event *events,
3875                        uint16_t max_num,
3876                        uint64_t dequeue_timeout_ticks)
3877 {
3878         uint64_t start_ticks = 0ULL;
3879         struct dlb2_port *qm_port;
3880         int num = 0;
3881         bool use_scalar;
3882         uint64_t timeout;
3883
3884         qm_port = &ev_port->qm_port;
3885         use_scalar = qm_port->use_scalar;
3886
3887         if (!dlb2->global_dequeue_wait)
3888                 timeout = dequeue_timeout_ticks;
3889         else
3890                 timeout = dlb2->global_dequeue_wait_ticks;
3891
3892         start_ticks = rte_get_timer_cycles();
3893
3894         use_scalar = use_scalar || (max_num & 0x3);
3895
3896         while (num < max_num) {
3897                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3898                 int num_avail;
3899                 if (use_scalar) {
3900                         num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3901                         num_avail = RTE_MIN(num_avail, max_num - num);
3902                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3903                         if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3904                                 num += dlb2_process_dequeue_four_qes(ev_port,
3905                                                                   qm_port,
3906                                                                   &events[num],
3907                                                                   &qes[0]);
3908                         else if (num_avail)
3909                                 num += dlb2_process_dequeue_qes(ev_port,
3910                                                                 qm_port,
3911                                                                 &events[num],
3912                                                                 &qes[0],
3913                                                                 num_avail);
3914                 } else { /* !use_scalar */
3915                         num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3916                                                             &events[num],
3917                                                             max_num - num);
3918                         num += num_avail;
3919                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3920                         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3921                 }
3922                 if (!num_avail) {
3923                         if (num > 0)
3924                                 break;
3925                         else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3926                                                    timeout, start_ticks))
3927                                 break;
3928                 }
3929         }
3930
3931         qm_port->owed_tokens += num;
3932
3933         if (num) {
3934                 if (qm_port->token_pop_mode == AUTO_POP)
3935                         dlb2_consume_qe_immediate(qm_port, num);
3936
3937                 ev_port->outstanding_releases += num;
3938
3939                 dlb2_port_credits_inc(qm_port, num);
3940         }
3941
3942         return num;
3943 }
3944
3945 static __rte_always_inline int
3946 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3947              uint8_t *offset)
3948 {
3949         uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3950                                    {0x00, 0x01, 0x03, 0x07} };
3951         uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3952         volatile struct dlb2_dequeue_qe *cq_addr;
3953         __m128i *qes = (__m128i *)qe;
3954         uint64_t *cache_line_base;
3955         uint8_t gen_bits;
3956
3957         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3958         cq_addr = &cq_addr[qm_port->cq_idx];
3959
3960         cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3961         *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3962
3963         /* Load the next CQ cache line from memory. Pack these reads as tight
3964          * as possible to reduce the chance that DLB invalidates the line while
3965          * the CPU is reading it. Read the cache line backwards to ensure that
3966          * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3967          *
3968          * (Valid QEs start at &qe[offset])
3969          */
3970         qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3971         qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3972         qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3973         qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3974
3975         /* Evict the cache line ASAP */
3976         rte_cldemote(cache_line_base);
3977
3978         /* Extract and combine the gen bits */
3979         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3980                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3981                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3982                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3983
3984         /* XOR the combined bits such that a 1 represents a valid QE */
3985         gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
3986
3987         /* Mask off gen bits we don't care about */
3988         gen_bits &= and_mask[*offset];
3989
3990         return __builtin_popcount(gen_bits);
3991 }
3992
3993 static inline int16_t
3994 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
3995                 struct dlb2_eventdev_port *ev_port,
3996                 struct rte_event *events,
3997                 uint16_t max_num,
3998                 uint64_t dequeue_timeout_ticks)
3999 {
4000         uint64_t timeout;
4001         uint64_t start_ticks = 0ULL;
4002         struct dlb2_port *qm_port;
4003         int num = 0;
4004
4005         qm_port = &ev_port->qm_port;
4006
4007         /* We have a special implementation for waiting. Wait can be:
4008          * 1) no waiting at all
4009          * 2) busy poll only
4010          * 3) wait for interrupt. If wakeup and poll time
4011          * has expired, then return to caller
4012          * 4) umonitor/umwait repeatedly up to poll time
4013          */
4014
4015         /* If configured for per dequeue wait, then use wait value provided
4016          * to this API. Otherwise we must use the global
4017          * value from eventdev config time.
4018          */
4019         if (!dlb2->global_dequeue_wait)
4020                 timeout = dequeue_timeout_ticks;
4021         else
4022                 timeout = dlb2->global_dequeue_wait_ticks;
4023
4024         start_ticks = rte_get_timer_cycles();
4025
4026         while (num < max_num) {
4027                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4028                 uint8_t offset;
4029                 int num_avail;
4030
4031                 /* Copy up to 4 QEs from the current cache line into qes */
4032                 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4033
4034                 /* But don't process more than the user requested */
4035                 num_avail = RTE_MIN(num_avail, max_num - num);
4036
4037                 dlb2_inc_cq_idx(qm_port, num_avail);
4038
4039                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4040                         num += dlb2_process_dequeue_four_qes(ev_port,
4041                                                              qm_port,
4042                                                              &events[num],
4043                                                              &qes[offset]);
4044                 else if (num_avail)
4045                         num += dlb2_process_dequeue_qes(ev_port,
4046                                                         qm_port,
4047                                                         &events[num],
4048                                                         &qes[offset],
4049                                                         num_avail);
4050                 else if ((timeout == 0) || (num > 0))
4051                         /* Not waiting in any form, or 1+ events received? */
4052                         break;
4053                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4054                                            timeout, start_ticks))
4055                         break;
4056         }
4057
4058         qm_port->owed_tokens += num;
4059
4060         if (num) {
4061                 if (qm_port->token_pop_mode == AUTO_POP)
4062                         dlb2_consume_qe_immediate(qm_port, num);
4063
4064                 ev_port->outstanding_releases += num;
4065
4066                 dlb2_port_credits_inc(qm_port, num);
4067         }
4068
4069         return num;
4070 }
4071
4072 static uint16_t
4073 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4074                          uint64_t wait)
4075 {
4076         struct dlb2_eventdev_port *ev_port = event_port;
4077         struct dlb2_port *qm_port = &ev_port->qm_port;
4078         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4079         uint16_t cnt;
4080
4081         RTE_ASSERT(ev_port->setup_done);
4082         RTE_ASSERT(ev != NULL);
4083
4084         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4085                 uint16_t out_rels = ev_port->outstanding_releases;
4086
4087                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4088
4089                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4090         }
4091
4092         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4093                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4094
4095         cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4096
4097         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4098         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4099
4100         return cnt;
4101 }
4102
4103 static uint16_t
4104 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4105 {
4106         return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4107 }
4108
4109 static uint16_t
4110 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4111                                 uint16_t num, uint64_t wait)
4112 {
4113         struct dlb2_eventdev_port *ev_port = event_port;
4114         struct dlb2_port *qm_port = &ev_port->qm_port;
4115         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4116         uint16_t cnt;
4117
4118         RTE_ASSERT(ev_port->setup_done);
4119         RTE_ASSERT(ev != NULL);
4120
4121         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4122                 uint16_t out_rels = ev_port->outstanding_releases;
4123
4124                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4125
4126                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4127         }
4128
4129         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4130                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4131
4132         cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4133
4134         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4135         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4136         return cnt;
4137 }
4138
4139 static uint16_t
4140 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4141                           uint64_t wait)
4142 {
4143         return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4144 }
4145
4146 static void
4147 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4148 {
4149         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4150         eventdev_stop_flush_t flush;
4151         struct rte_event ev;
4152         uint8_t dev_id;
4153         void *arg;
4154         int i;
4155
4156         flush = dev->dev_ops->dev_stop_flush;
4157         dev_id = dev->data->dev_id;
4158         arg = dev->data->dev_stop_flush_arg;
4159
4160         while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4161                 if (flush)
4162                         flush(dev_id, ev, arg);
4163
4164                 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4165                         continue;
4166
4167                 ev.op = RTE_EVENT_OP_RELEASE;
4168
4169                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4170         }
4171
4172         /* Enqueue any additional outstanding releases */
4173         ev.op = RTE_EVENT_OP_RELEASE;
4174
4175         for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4176                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4177 }
4178
4179 static uint32_t
4180 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4181                          struct dlb2_eventdev_queue *queue)
4182 {
4183         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4184         struct dlb2_get_ldb_queue_depth_args cfg;
4185         int ret;
4186
4187         cfg.queue_id = queue->qm_queue.id;
4188
4189         ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4190         if (ret < 0) {
4191                 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4192                              ret, dlb2_error_strings[cfg.response.status]);
4193                 return ret;
4194         }
4195
4196         return cfg.response.id;
4197 }
4198
4199 static uint32_t
4200 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4201                          struct dlb2_eventdev_queue *queue)
4202 {
4203         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4204         struct dlb2_get_dir_queue_depth_args cfg;
4205         int ret;
4206
4207         cfg.queue_id = queue->qm_queue.id;
4208
4209         ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4210         if (ret < 0) {
4211                 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4212                              ret, dlb2_error_strings[cfg.response.status]);
4213                 return ret;
4214         }
4215
4216         return cfg.response.id;
4217 }
4218
4219 uint32_t
4220 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4221                      struct dlb2_eventdev_queue *queue)
4222 {
4223         if (queue->qm_queue.is_directed)
4224                 return dlb2_get_dir_queue_depth(dlb2, queue);
4225         else
4226                 return dlb2_get_ldb_queue_depth(dlb2, queue);
4227 }
4228
4229 static bool
4230 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4231                     struct dlb2_eventdev_queue *queue)
4232 {
4233         return dlb2_get_queue_depth(dlb2, queue) == 0;
4234 }
4235
4236 static bool
4237 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4238 {
4239         int i;
4240
4241         for (i = 0; i < dlb2->num_queues; i++) {
4242                 if (dlb2->ev_queues[i].num_links == 0)
4243                         continue;
4244                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4245                         return false;
4246         }
4247
4248         return true;
4249 }
4250
4251 static bool
4252 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4253 {
4254         int i;
4255
4256         for (i = 0; i < dlb2->num_queues; i++) {
4257                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4258                         return false;
4259         }
4260
4261         return true;
4262 }
4263
4264 static void
4265 dlb2_drain(struct rte_eventdev *dev)
4266 {
4267         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4268         struct dlb2_eventdev_port *ev_port = NULL;
4269         uint8_t dev_id;
4270         int i;
4271
4272         dev_id = dev->data->dev_id;
4273
4274         while (!dlb2_linked_queues_empty(dlb2)) {
4275                 /* Flush all the ev_ports, which will drain all their connected
4276                  * queues.
4277                  */
4278                 for (i = 0; i < dlb2->num_ports; i++)
4279                         dlb2_flush_port(dev, i);
4280         }
4281
4282         /* The queues are empty, but there may be events left in the ports. */
4283         for (i = 0; i < dlb2->num_ports; i++)
4284                 dlb2_flush_port(dev, i);
4285
4286         /* If the domain's queues are empty, we're done. */
4287         if (dlb2_queues_empty(dlb2))
4288                 return;
4289
4290         /* Else, there must be at least one unlinked load-balanced queue.
4291          * Select a load-balanced port with which to drain the unlinked
4292          * queue(s).
4293          */
4294         for (i = 0; i < dlb2->num_ports; i++) {
4295                 ev_port = &dlb2->ev_ports[i];
4296
4297                 if (!ev_port->qm_port.is_directed)
4298                         break;
4299         }
4300
4301         if (i == dlb2->num_ports) {
4302                 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4303                 return;
4304         }
4305
4306         rte_errno = 0;
4307         rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4308
4309         if (rte_errno) {
4310                 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4311                              ev_port->id);
4312                 return;
4313         }
4314
4315         for (i = 0; i < dlb2->num_queues; i++) {
4316                 uint8_t qid, prio;
4317                 int ret;
4318
4319                 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4320                         continue;
4321
4322                 qid = i;
4323                 prio = 0;
4324
4325                 /* Link the ev_port to the queue */
4326                 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4327                 if (ret != 1) {
4328                         DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4329                                      ev_port->id, qid);
4330                         return;
4331                 }
4332
4333                 /* Flush the queue */
4334                 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4335                         dlb2_flush_port(dev, ev_port->id);
4336
4337                 /* Drain any extant events in the ev_port. */
4338                 dlb2_flush_port(dev, ev_port->id);
4339
4340                 /* Unlink the ev_port from the queue */
4341                 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4342                 if (ret != 1) {
4343                         DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4344                                      ev_port->id, qid);
4345                         return;
4346                 }
4347         }
4348 }
4349
4350 static void
4351 dlb2_eventdev_stop(struct rte_eventdev *dev)
4352 {
4353         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4354
4355         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4356
4357         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4358                 DLB2_LOG_DBG("Internal error: already stopped\n");
4359                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4360                 return;
4361         } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4362                 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4363                              (int)dlb2->run_state);
4364                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4365                 return;
4366         }
4367
4368         dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4369
4370         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4371
4372         dlb2_drain(dev);
4373
4374         dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4375 }
4376
4377 static int
4378 dlb2_eventdev_close(struct rte_eventdev *dev)
4379 {
4380         dlb2_hw_reset_sched_domain(dev, false);
4381
4382         return 0;
4383 }
4384
4385 static void
4386 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4387 {
4388         RTE_SET_USED(dev);
4389         RTE_SET_USED(id);
4390
4391         /* This function intentionally left blank. */
4392 }
4393
4394 static void
4395 dlb2_eventdev_port_release(void *port)
4396 {
4397         struct dlb2_eventdev_port *ev_port = port;
4398         struct dlb2_port *qm_port;
4399
4400         if (ev_port) {
4401                 qm_port = &ev_port->qm_port;
4402                 if (qm_port->config_state == DLB2_CONFIGURED)
4403                         dlb2_free_qe_mem(qm_port);
4404         }
4405 }
4406
4407 static int
4408 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4409                             uint64_t *timeout_ticks)
4410 {
4411         RTE_SET_USED(dev);
4412         uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4413
4414         *timeout_ticks = ns * cycles_per_ns;
4415
4416         return 0;
4417 }
4418
4419 static void
4420 dlb2_entry_points_init(struct rte_eventdev *dev)
4421 {
4422         struct dlb2_eventdev *dlb2;
4423
4424         /* Expose PMD's eventdev interface */
4425         static struct eventdev_ops dlb2_eventdev_entry_ops = {
4426                 .dev_infos_get    = dlb2_eventdev_info_get,
4427                 .dev_configure    = dlb2_eventdev_configure,
4428                 .dev_start        = dlb2_eventdev_start,
4429                 .dev_stop         = dlb2_eventdev_stop,
4430                 .dev_close        = dlb2_eventdev_close,
4431                 .queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
4432                 .queue_setup      = dlb2_eventdev_queue_setup,
4433                 .queue_release    = dlb2_eventdev_queue_release,
4434                 .port_def_conf    = dlb2_eventdev_port_default_conf_get,
4435                 .port_setup       = dlb2_eventdev_port_setup,
4436                 .port_release     = dlb2_eventdev_port_release,
4437                 .port_link        = dlb2_eventdev_port_link,
4438                 .port_unlink      = dlb2_eventdev_port_unlink,
4439                 .port_unlinks_in_progress =
4440                                     dlb2_eventdev_port_unlinks_in_progress,
4441                 .timeout_ticks    = dlb2_eventdev_timeout_ticks,
4442                 .dump             = dlb2_eventdev_dump,
4443                 .xstats_get       = dlb2_eventdev_xstats_get,
4444                 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4445                 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4446                 .xstats_reset       = dlb2_eventdev_xstats_reset,
4447                 .dev_selftest     = test_dlb2_eventdev,
4448         };
4449
4450         /* Expose PMD's eventdev interface */
4451
4452         dev->dev_ops = &dlb2_eventdev_entry_ops;
4453         dev->enqueue = dlb2_event_enqueue;
4454         dev->enqueue_burst = dlb2_event_enqueue_burst;
4455         dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4456         dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4457
4458         dlb2 = dev->data->dev_private;
4459         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4460                 dev->dequeue = dlb2_event_dequeue_sparse;
4461                 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4462         } else {
4463                 dev->dequeue = dlb2_event_dequeue;
4464                 dev->dequeue_burst = dlb2_event_dequeue_burst;
4465         }
4466 }
4467
4468 int
4469 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4470                             const char *name,
4471                             struct dlb2_devargs *dlb2_args)
4472 {
4473         struct dlb2_eventdev *dlb2;
4474         int err, i;
4475
4476         dlb2 = dev->data->dev_private;
4477
4478         dlb2->event_dev = dev; /* backlink */
4479
4480         evdev_dlb2_default_info.driver_name = name;
4481
4482         dlb2->max_num_events_override = dlb2_args->max_num_events;
4483         dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4484         dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4485         dlb2->poll_interval = dlb2_args->poll_interval;
4486         dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4487         dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4488         dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4489         dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4490
4491         err = dlb2_iface_open(&dlb2->qm_instance, name);
4492         if (err < 0) {
4493                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4494                              err);
4495                 return err;
4496         }
4497
4498         err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4499                                             &dlb2->revision);
4500         if (err < 0) {
4501                 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4502                              err);
4503                 return err;
4504         }
4505
4506         err = dlb2_hw_query_resources(dlb2);
4507         if (err) {
4508                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4509                              err, name);
4510                 return err;
4511         }
4512
4513         dlb2_iface_hardware_init(&dlb2->qm_instance);
4514
4515         err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4516         if (err < 0) {
4517                 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4518                              err);
4519                 return err;
4520         }
4521
4522         /* Complete xtstats runtime initialization */
4523         err = dlb2_xstats_init(dlb2);
4524         if (err) {
4525                 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4526                 return err;
4527         }
4528
4529         /* Initialize each port's token pop mode */
4530         for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4531                 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4532
4533         rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4534
4535         dlb2_iface_low_level_io_init();
4536
4537         dlb2_entry_points_init(dev);
4538
4539         dlb2_init_queue_depth_thresholds(dlb2,
4540                                          dlb2_args->qid_depth_thresholds.val);
4541
4542         return 0;
4543 }
4544
4545 int
4546 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4547                               const char *name)
4548 {
4549         struct dlb2_eventdev *dlb2;
4550         int err;
4551
4552         dlb2 = dev->data->dev_private;
4553
4554         evdev_dlb2_default_info.driver_name = name;
4555
4556         err = dlb2_iface_open(&dlb2->qm_instance, name);
4557         if (err < 0) {
4558                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4559                              err);
4560                 return err;
4561         }
4562
4563         err = dlb2_hw_query_resources(dlb2);
4564         if (err) {
4565                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4566                              err, name);
4567                 return err;
4568         }
4569
4570         dlb2_iface_low_level_io_init();
4571
4572         dlb2_entry_points_init(dev);
4573
4574         return 0;
4575 }
4576
4577 int
4578 dlb2_parse_params(const char *params,
4579                   const char *name,
4580                   struct dlb2_devargs *dlb2_args,
4581                   uint8_t version)
4582 {
4583         int ret = 0;
4584         static const char * const args[] = { NUMA_NODE_ARG,
4585                                              DLB2_MAX_NUM_EVENTS,
4586                                              DLB2_NUM_DIR_CREDITS,
4587                                              DEV_ID_ARG,
4588                                              DLB2_QID_DEPTH_THRESH_ARG,
4589                                              DLB2_COS_ARG,
4590                                              DLB2_POLL_INTERVAL_ARG,
4591                                              DLB2_SW_CREDIT_QUANTA_ARG,
4592                                              DLB2_HW_CREDIT_QUANTA_ARG,
4593                                              DLB2_DEPTH_THRESH_ARG,
4594                                              DLB2_VECTOR_OPTS_ENAB_ARG,
4595                                              NULL };
4596
4597         if (params != NULL && params[0] != '\0') {
4598                 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4599
4600                 if (kvlist == NULL) {
4601                         RTE_LOG(INFO, PMD,
4602                                 "Ignoring unsupported parameters when creating device '%s'\n",
4603                                 name);
4604                 } else {
4605                         int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4606                                                      set_numa_node,
4607                                                      &dlb2_args->socket_id);
4608                         if (ret != 0) {
4609                                 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4610                                              name);
4611                                 rte_kvargs_free(kvlist);
4612                                 return ret;
4613                         }
4614
4615                         ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4616                                                  set_max_num_events,
4617                                                  &dlb2_args->max_num_events);
4618                         if (ret != 0) {
4619                                 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4620                                              name);
4621                                 rte_kvargs_free(kvlist);
4622                                 return ret;
4623                         }
4624
4625                         if (version == DLB2_HW_V2) {
4626                                 ret = rte_kvargs_process(kvlist,
4627                                         DLB2_NUM_DIR_CREDITS,
4628                                         set_num_dir_credits,
4629                                         &dlb2_args->num_dir_credits_override);
4630                                 if (ret != 0) {
4631                                         DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4632                                                      name);
4633                                         rte_kvargs_free(kvlist);
4634                                         return ret;
4635                                 }
4636                         }
4637                         ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4638                                                  set_dev_id,
4639                                                  &dlb2_args->dev_id);
4640                         if (ret != 0) {
4641                                 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4642                                              name);
4643                                 rte_kvargs_free(kvlist);
4644                                 return ret;
4645                         }
4646
4647                         if (version == DLB2_HW_V2) {
4648                                 ret = rte_kvargs_process(
4649                                         kvlist,
4650                                         DLB2_QID_DEPTH_THRESH_ARG,
4651                                         set_qid_depth_thresh,
4652                                         &dlb2_args->qid_depth_thresholds);
4653                         } else {
4654                                 ret = rte_kvargs_process(
4655                                         kvlist,
4656                                         DLB2_QID_DEPTH_THRESH_ARG,
4657                                         set_qid_depth_thresh_v2_5,
4658                                         &dlb2_args->qid_depth_thresholds);
4659                         }
4660                         if (ret != 0) {
4661                                 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4662                                              name);
4663                                 rte_kvargs_free(kvlist);
4664                                 return ret;
4665                         }
4666
4667                         ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4668                                                  set_cos,
4669                                                  &dlb2_args->cos_id);
4670                         if (ret != 0) {
4671                                 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4672                                              name);
4673                                 rte_kvargs_free(kvlist);
4674                                 return ret;
4675                         }
4676
4677                         ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4678                                                  set_poll_interval,
4679                                                  &dlb2_args->poll_interval);
4680                         if (ret != 0) {
4681                                 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4682                                              name);
4683                                 rte_kvargs_free(kvlist);
4684                                 return ret;
4685                         }
4686
4687                         ret = rte_kvargs_process(kvlist,
4688                                                  DLB2_SW_CREDIT_QUANTA_ARG,
4689                                                  set_sw_credit_quanta,
4690                                                  &dlb2_args->sw_credit_quanta);
4691                         if (ret != 0) {
4692                                 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4693                                              name);
4694                                 rte_kvargs_free(kvlist);
4695                                 return ret;
4696                         }
4697
4698                         ret = rte_kvargs_process(kvlist,
4699                                                  DLB2_HW_CREDIT_QUANTA_ARG,
4700                                                  set_hw_credit_quanta,
4701                                                  &dlb2_args->hw_credit_quanta);
4702                         if (ret != 0) {
4703                                 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4704                                              name);
4705                                 rte_kvargs_free(kvlist);
4706                                 return ret;
4707                         }
4708
4709                         ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4710                                         set_default_depth_thresh,
4711                                         &dlb2_args->default_depth_thresh);
4712                         if (ret != 0) {
4713                                 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4714                                              name);
4715                                 rte_kvargs_free(kvlist);
4716                                 return ret;
4717                         }
4718
4719                         ret = rte_kvargs_process(kvlist,
4720                                         DLB2_VECTOR_OPTS_ENAB_ARG,
4721                                         set_vector_opts_enab,
4722                                         &dlb2_args->vector_opts_enabled);
4723                         if (ret != 0) {
4724                                 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4725                                              name);
4726                                 rte_kvargs_free(kvlist);
4727                                 return ret;
4728                         }
4729
4730                         rte_kvargs_free(kvlist);
4731                 }
4732         }
4733         return ret;
4734 }
4735 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);