eventdev: negate maintenance capability flag
[dpdk.git] / drivers / event / dlb2 / dlb2.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2020 Intel Corporation
3  */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46         .driver_name = "", /* probe will set */
47         .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48         .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50         .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52         .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54         .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55         .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56         .max_event_priority_levels = DLB2_QID_PRIORITIES,
57         .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58         .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59         .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60         .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61         .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62         .max_single_link_event_port_queue_pairs =
63                 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64         .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65                           RTE_EVENT_DEV_CAP_EVENT_QOS |
66                           RTE_EVENT_DEV_CAP_BURST_MODE |
67                           RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68                           RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69                           RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
70                           RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
71 };
72
73 struct process_local_port_data
74 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
75
76 static void
77 dlb2_free_qe_mem(struct dlb2_port *qm_port)
78 {
79         if (qm_port == NULL)
80                 return;
81
82         rte_free(qm_port->qe4);
83         qm_port->qe4 = NULL;
84
85         rte_free(qm_port->int_arm_qe);
86         qm_port->int_arm_qe = NULL;
87
88         rte_free(qm_port->consume_qe);
89         qm_port->consume_qe = NULL;
90
91         rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
92         dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
93 }
94
95 /* override defaults with value(s) provided on command line */
96 static void
97 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
98                                  int *qid_depth_thresholds)
99 {
100         int q;
101
102         for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
103                 if (qid_depth_thresholds[q] != 0)
104                         dlb2->ev_queues[q].depth_threshold =
105                                 qid_depth_thresholds[q];
106         }
107 }
108
109 static int
110 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 {
112         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
113         struct dlb2_hw_resource_info *dlb2_info = &handle->info;
114         int ret;
115
116         /* Query driver resources provisioned for this device */
117
118         ret = dlb2_iface_get_num_resources(handle,
119                                            &dlb2->hw_rsrc_query_results);
120         if (ret) {
121                 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
122                 return ret;
123         }
124
125         /* Complete filling in device resource info returned to evdev app,
126          * overriding any default values.
127          * The capabilities (CAPs) were set at compile time.
128          */
129
130         evdev_dlb2_default_info.max_event_queues =
131                 dlb2->hw_rsrc_query_results.num_ldb_queues;
132
133         evdev_dlb2_default_info.max_event_ports =
134                 dlb2->hw_rsrc_query_results.num_ldb_ports;
135
136         if (dlb2->version == DLB2_HW_V2_5) {
137                 evdev_dlb2_default_info.max_num_events =
138                         dlb2->hw_rsrc_query_results.num_credits;
139         } else {
140                 evdev_dlb2_default_info.max_num_events =
141                         dlb2->hw_rsrc_query_results.num_ldb_credits;
142         }
143         /* Save off values used when creating the scheduling domain. */
144
145         handle->info.num_sched_domains =
146                 dlb2->hw_rsrc_query_results.num_sched_domains;
147
148         if (dlb2->version == DLB2_HW_V2_5) {
149                 handle->info.hw_rsrc_max.nb_events_limit =
150                         dlb2->hw_rsrc_query_results.num_credits;
151         } else {
152                 handle->info.hw_rsrc_max.nb_events_limit =
153                         dlb2->hw_rsrc_query_results.num_ldb_credits;
154         }
155         handle->info.hw_rsrc_max.num_queues =
156                 dlb2->hw_rsrc_query_results.num_ldb_queues +
157                 dlb2->hw_rsrc_query_results.num_dir_ports;
158
159         handle->info.hw_rsrc_max.num_ldb_queues =
160                 dlb2->hw_rsrc_query_results.num_ldb_queues;
161
162         handle->info.hw_rsrc_max.num_ldb_ports =
163                 dlb2->hw_rsrc_query_results.num_ldb_ports;
164
165         handle->info.hw_rsrc_max.num_dir_ports =
166                 dlb2->hw_rsrc_query_results.num_dir_ports;
167
168         handle->info.hw_rsrc_max.reorder_window_size =
169                 dlb2->hw_rsrc_query_results.num_hist_list_entries;
170
171         rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
172
173         return 0;
174 }
175
176 #define DLB2_BASE_10 10
177
178 static int
179 dlb2_string_to_int(int *result, const char *str)
180 {
181         long ret;
182         char *endptr;
183
184         if (str == NULL || result == NULL)
185                 return -EINVAL;
186
187         errno = 0;
188         ret = strtol(str, &endptr, DLB2_BASE_10);
189         if (errno)
190                 return -errno;
191
192         /* long int and int may be different width for some architectures */
193         if (ret < INT_MIN || ret > INT_MAX || endptr == str)
194                 return -EINVAL;
195
196         *result = ret;
197         return 0;
198 }
199
200 static int
201 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
202 {
203         int *socket_id = opaque;
204         int ret;
205
206         ret = dlb2_string_to_int(socket_id, value);
207         if (ret < 0)
208                 return ret;
209
210         if (*socket_id > RTE_MAX_NUMA_NODES)
211                 return -EINVAL;
212         return 0;
213 }
214
215 static int
216 set_max_num_events(const char *key __rte_unused,
217                    const char *value,
218                    void *opaque)
219 {
220         int *max_num_events = opaque;
221         int ret;
222
223         if (value == NULL || opaque == NULL) {
224                 DLB2_LOG_ERR("NULL pointer\n");
225                 return -EINVAL;
226         }
227
228         ret = dlb2_string_to_int(max_num_events, value);
229         if (ret < 0)
230                 return ret;
231
232         if (*max_num_events < 0 || *max_num_events >
233                         DLB2_MAX_NUM_LDB_CREDITS) {
234                 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
235                              DLB2_MAX_NUM_LDB_CREDITS);
236                 return -EINVAL;
237         }
238
239         return 0;
240 }
241
242 static int
243 set_num_dir_credits(const char *key __rte_unused,
244                     const char *value,
245                     void *opaque)
246 {
247         int *num_dir_credits = opaque;
248         int ret;
249
250         if (value == NULL || opaque == NULL) {
251                 DLB2_LOG_ERR("NULL pointer\n");
252                 return -EINVAL;
253         }
254
255         ret = dlb2_string_to_int(num_dir_credits, value);
256         if (ret < 0)
257                 return ret;
258
259         if (*num_dir_credits < 0 ||
260             *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
261                 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
262                              DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
263                 return -EINVAL;
264         }
265
266         return 0;
267 }
268
269 static int
270 set_dev_id(const char *key __rte_unused,
271            const char *value,
272            void *opaque)
273 {
274         int *dev_id = opaque;
275         int ret;
276
277         if (value == NULL || opaque == NULL) {
278                 DLB2_LOG_ERR("NULL pointer\n");
279                 return -EINVAL;
280         }
281
282         ret = dlb2_string_to_int(dev_id, value);
283         if (ret < 0)
284                 return ret;
285
286         return 0;
287 }
288
289 static int
290 set_cos(const char *key __rte_unused,
291         const char *value,
292         void *opaque)
293 {
294         enum dlb2_cos *cos_id = opaque;
295         int x = 0;
296         int ret;
297
298         if (value == NULL || opaque == NULL) {
299                 DLB2_LOG_ERR("NULL pointer\n");
300                 return -EINVAL;
301         }
302
303         ret = dlb2_string_to_int(&x, value);
304         if (ret < 0)
305                 return ret;
306
307         if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
308                 DLB2_LOG_ERR(
309                         "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
310                         x);
311                 return -EINVAL;
312         }
313
314         *cos_id = x;
315
316         return 0;
317 }
318
319 static int
320 set_poll_interval(const char *key __rte_unused,
321         const char *value,
322         void *opaque)
323 {
324         int *poll_interval = opaque;
325         int ret;
326
327         if (value == NULL || opaque == NULL) {
328                 DLB2_LOG_ERR("NULL pointer\n");
329                 return -EINVAL;
330         }
331
332         ret = dlb2_string_to_int(poll_interval, value);
333         if (ret < 0)
334                 return ret;
335
336         return 0;
337 }
338
339 static int
340 set_sw_credit_quanta(const char *key __rte_unused,
341         const char *value,
342         void *opaque)
343 {
344         int *sw_credit_quanta = opaque;
345         int ret;
346
347         if (value == NULL || opaque == NULL) {
348                 DLB2_LOG_ERR("NULL pointer\n");
349                 return -EINVAL;
350         }
351
352         ret = dlb2_string_to_int(sw_credit_quanta, value);
353         if (ret < 0)
354                 return ret;
355
356         return 0;
357 }
358
359 static int
360 set_hw_credit_quanta(const char *key __rte_unused,
361         const char *value,
362         void *opaque)
363 {
364         int *hw_credit_quanta = opaque;
365         int ret;
366
367         if (value == NULL || opaque == NULL) {
368                 DLB2_LOG_ERR("NULL pointer\n");
369                 return -EINVAL;
370         }
371
372         ret = dlb2_string_to_int(hw_credit_quanta, value);
373         if (ret < 0)
374                 return ret;
375
376         return 0;
377 }
378
379 static int
380 set_default_depth_thresh(const char *key __rte_unused,
381         const char *value,
382         void *opaque)
383 {
384         int *default_depth_thresh = opaque;
385         int ret;
386
387         if (value == NULL || opaque == NULL) {
388                 DLB2_LOG_ERR("NULL pointer\n");
389                 return -EINVAL;
390         }
391
392         ret = dlb2_string_to_int(default_depth_thresh, value);
393         if (ret < 0)
394                 return ret;
395
396         return 0;
397 }
398
399 static int
400 set_vector_opts_enab(const char *key __rte_unused,
401         const char *value,
402         void *opaque)
403 {
404         bool *dlb2_vector_opts_enabled = opaque;
405
406         if (value == NULL || opaque == NULL) {
407                 DLB2_LOG_ERR("NULL pointer\n");
408                 return -EINVAL;
409         }
410
411         if ((*value == 'y') || (*value == 'Y'))
412                 *dlb2_vector_opts_enabled = true;
413         else
414                 *dlb2_vector_opts_enabled = false;
415
416         return 0;
417 }
418
419 static int
420 set_qid_depth_thresh(const char *key __rte_unused,
421                      const char *value,
422                      void *opaque)
423 {
424         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
425         int first, last, thresh, i;
426
427         if (value == NULL || opaque == NULL) {
428                 DLB2_LOG_ERR("NULL pointer\n");
429                 return -EINVAL;
430         }
431
432         /* command line override may take one of the following 3 forms:
433          * qid_depth_thresh=all:<threshold_value> ... all queues
434          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
435          * qid_depth_thresh=qid:<threshold_value> ... just one queue
436          */
437         if (sscanf(value, "all:%d", &thresh) == 1) {
438                 first = 0;
439                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
440         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
441                 /* we have everything we need */
442         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
443                 last = first;
444         } else {
445                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
446                 return -EINVAL;
447         }
448
449         if (first > last || first < 0 ||
450                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
451                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
452                 return -EINVAL;
453         }
454
455         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
456                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
457                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
458                 return -EINVAL;
459         }
460
461         for (i = first; i <= last; i++)
462                 qid_thresh->val[i] = thresh; /* indexed by qid */
463
464         return 0;
465 }
466
467 static int
468 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
469                           const char *value,
470                           void *opaque)
471 {
472         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
473         int first, last, thresh, i;
474
475         if (value == NULL || opaque == NULL) {
476                 DLB2_LOG_ERR("NULL pointer\n");
477                 return -EINVAL;
478         }
479
480         /* command line override may take one of the following 3 forms:
481          * qid_depth_thresh=all:<threshold_value> ... all queues
482          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
483          * qid_depth_thresh=qid:<threshold_value> ... just one queue
484          */
485         if (sscanf(value, "all:%d", &thresh) == 1) {
486                 first = 0;
487                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
488         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
489                 /* we have everything we need */
490         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
491                 last = first;
492         } else {
493                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
494                 return -EINVAL;
495         }
496
497         if (first > last || first < 0 ||
498                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
499                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
500                 return -EINVAL;
501         }
502
503         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
504                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
505                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
506                 return -EINVAL;
507         }
508
509         for (i = first; i <= last; i++)
510                 qid_thresh->val[i] = thresh; /* indexed by qid */
511
512         return 0;
513 }
514
515 static void
516 dlb2_eventdev_info_get(struct rte_eventdev *dev,
517                        struct rte_event_dev_info *dev_info)
518 {
519         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
520         int ret;
521
522         ret = dlb2_hw_query_resources(dlb2);
523         if (ret) {
524                 const struct rte_eventdev_data *data = dev->data;
525
526                 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
527                              ret, data->dev_id);
528                 /* fn is void, so fall through and return values set up in
529                  * probe
530                  */
531         }
532
533         /* Add num resources currently owned by this domain.
534          * These would become available if the scheduling domain were reset due
535          * to the application recalling eventdev_configure to *reconfigure* the
536          * domain.
537          */
538         evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
539         evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
540         if (dlb2->version == DLB2_HW_V2_5) {
541                 evdev_dlb2_default_info.max_num_events +=
542                         dlb2->max_credits;
543         } else {
544                 evdev_dlb2_default_info.max_num_events +=
545                         dlb2->max_ldb_credits;
546         }
547         evdev_dlb2_default_info.max_event_queues =
548                 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
549                         RTE_EVENT_MAX_QUEUES_PER_DEV);
550
551         evdev_dlb2_default_info.max_num_events =
552                 RTE_MIN(evdev_dlb2_default_info.max_num_events,
553                         dlb2->max_num_events_override);
554
555         *dev_info = evdev_dlb2_default_info;
556 }
557
558 static int
559 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
560                             const struct dlb2_hw_rsrcs *resources_asked,
561                             uint8_t device_version)
562 {
563         int ret = 0;
564         struct dlb2_create_sched_domain_args *cfg;
565
566         if (resources_asked == NULL) {
567                 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
568                 ret = EINVAL;
569                 goto error_exit;
570         }
571
572         /* Map generic qm resources to dlb2 resources */
573         cfg = &handle->cfg.resources;
574
575         /* DIR ports and queues */
576
577         cfg->num_dir_ports = resources_asked->num_dir_ports;
578         if (device_version == DLB2_HW_V2_5)
579                 cfg->num_credits = resources_asked->num_credits;
580         else
581                 cfg->num_dir_credits = resources_asked->num_dir_credits;
582
583         /* LDB queues */
584
585         cfg->num_ldb_queues = resources_asked->num_ldb_queues;
586
587         /* LDB ports */
588
589         cfg->cos_strict = 0; /* Best effort */
590         cfg->num_cos_ldb_ports[0] = 0;
591         cfg->num_cos_ldb_ports[1] = 0;
592         cfg->num_cos_ldb_ports[2] = 0;
593         cfg->num_cos_ldb_ports[3] = 0;
594
595         switch (handle->cos_id) {
596         case DLB2_COS_0:
597                 cfg->num_ldb_ports = 0; /* no don't care ports */
598                 cfg->num_cos_ldb_ports[0] =
599                         resources_asked->num_ldb_ports;
600                 break;
601         case DLB2_COS_1:
602                 cfg->num_ldb_ports = 0; /* no don't care ports */
603                 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
604                 break;
605         case DLB2_COS_2:
606                 cfg->num_ldb_ports = 0; /* no don't care ports */
607                 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
608                 break;
609         case DLB2_COS_3:
610                 cfg->num_ldb_ports = 0; /* no don't care ports */
611                 cfg->num_cos_ldb_ports[3] =
612                         resources_asked->num_ldb_ports;
613                 break;
614         case DLB2_COS_DEFAULT:
615                 /* all ldb ports are don't care ports from a cos perspective */
616                 cfg->num_ldb_ports =
617                         resources_asked->num_ldb_ports;
618                 break;
619         }
620
621         if (device_version == DLB2_HW_V2)
622                 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
623
624         cfg->num_atomic_inflights =
625                 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
626                 cfg->num_ldb_queues;
627
628         cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
629                 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
630
631         if (device_version == DLB2_HW_V2_5) {
632                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
633                              cfg->num_ldb_queues,
634                              resources_asked->num_ldb_ports,
635                              cfg->num_dir_ports,
636                              cfg->num_atomic_inflights,
637                              cfg->num_hist_list_entries,
638                              cfg->num_credits);
639         } else {
640                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
641                              cfg->num_ldb_queues,
642                              resources_asked->num_ldb_ports,
643                              cfg->num_dir_ports,
644                              cfg->num_atomic_inflights,
645                              cfg->num_hist_list_entries,
646                              cfg->num_ldb_credits,
647                              cfg->num_dir_credits);
648         }
649
650         /* Configure the QM */
651
652         ret = dlb2_iface_sched_domain_create(handle, cfg);
653         if (ret < 0) {
654                 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
655                              ret,
656                              dlb2_error_strings[cfg->response.status]);
657
658                 goto error_exit;
659         }
660
661         handle->domain_id = cfg->response.id;
662         handle->cfg.configured = true;
663
664 error_exit:
665
666         return ret;
667 }
668
669 static void
670 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
671 {
672         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
673         enum dlb2_configuration_state config_state;
674         int i, j;
675
676         dlb2_iface_domain_reset(dlb2);
677
678         /* Free all dynamically allocated port memory */
679         for (i = 0; i < dlb2->num_ports; i++)
680                 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
681
682         /* If reconfiguring, mark the device's queues and ports as "previously
683          * configured." If the user doesn't reconfigure them, the PMD will
684          * reapply their previous configuration when the device is started.
685          */
686         config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
687                 DLB2_NOT_CONFIGURED;
688
689         for (i = 0; i < dlb2->num_ports; i++) {
690                 dlb2->ev_ports[i].qm_port.config_state = config_state;
691                 /* Reset setup_done so ports can be reconfigured */
692                 dlb2->ev_ports[i].setup_done = false;
693                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
694                         dlb2->ev_ports[i].link[j].mapped = false;
695         }
696
697         for (i = 0; i < dlb2->num_queues; i++)
698                 dlb2->ev_queues[i].qm_queue.config_state = config_state;
699
700         for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
701                 dlb2->ev_queues[i].setup_done = false;
702
703         dlb2->num_ports = 0;
704         dlb2->num_ldb_ports = 0;
705         dlb2->num_dir_ports = 0;
706         dlb2->num_queues = 0;
707         dlb2->num_ldb_queues = 0;
708         dlb2->num_dir_queues = 0;
709         dlb2->configured = false;
710 }
711
712 /* Note: 1 QM instance per QM device, QM instance/device == event device */
713 static int
714 dlb2_eventdev_configure(const struct rte_eventdev *dev)
715 {
716         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
717         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
718         struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
719         const struct rte_eventdev_data *data = dev->data;
720         const struct rte_event_dev_config *config = &data->dev_conf;
721         int ret;
722
723         /* If this eventdev is already configured, we must release the current
724          * scheduling domain before attempting to configure a new one.
725          */
726         if (dlb2->configured) {
727                 dlb2_hw_reset_sched_domain(dev, true);
728                 ret = dlb2_hw_query_resources(dlb2);
729                 if (ret) {
730                         DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
731                                      ret, data->dev_id);
732                         return ret;
733                 }
734         }
735
736         if (config->nb_event_queues > rsrcs->num_queues) {
737                 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
738                              config->nb_event_queues,
739                              rsrcs->num_queues);
740                 return -EINVAL;
741         }
742         if (config->nb_event_ports > (rsrcs->num_ldb_ports
743                         + rsrcs->num_dir_ports)) {
744                 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
745                              config->nb_event_ports,
746                              (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
747                 return -EINVAL;
748         }
749         if (config->nb_events_limit > rsrcs->nb_events_limit) {
750                 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
751                              config->nb_events_limit,
752                              rsrcs->nb_events_limit);
753                 return -EINVAL;
754         }
755
756         if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
757                 dlb2->global_dequeue_wait = false;
758         else {
759                 uint32_t timeout32;
760
761                 dlb2->global_dequeue_wait = true;
762
763                 /* note size mismatch of timeout vals in eventdev lib. */
764                 timeout32 = config->dequeue_timeout_ns;
765
766                 dlb2->global_dequeue_wait_ticks =
767                         timeout32 * (rte_get_timer_hz() / 1E9);
768         }
769
770         /* Does this platform support umonitor/umwait? */
771         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
772                 dlb2->umwait_allowed = true;
773
774         rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
775         rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
776         /* 1 dir queue per dir port */
777         rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
778
779         if (dlb2->version == DLB2_HW_V2_5) {
780                 rsrcs->num_credits = 0;
781                 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
782                         rsrcs->num_credits = config->nb_events_limit;
783         } else {
784                 /* Scale down nb_events_limit by 4 for directed credits,
785                  * since there are 4x as many load-balanced credits.
786                  */
787                 rsrcs->num_ldb_credits = 0;
788                 rsrcs->num_dir_credits = 0;
789
790                 if (rsrcs->num_ldb_queues)
791                         rsrcs->num_ldb_credits = config->nb_events_limit;
792                 if (rsrcs->num_dir_ports)
793                         rsrcs->num_dir_credits = config->nb_events_limit / 2;
794                 if (dlb2->num_dir_credits_override != -1)
795                         rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
796         }
797
798         if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
799                 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
800                 return -ENODEV;
801         }
802
803         dlb2->new_event_limit = config->nb_events_limit;
804         __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
805
806         /* Save number of ports/queues for this event dev */
807         dlb2->num_ports = config->nb_event_ports;
808         dlb2->num_queues = config->nb_event_queues;
809         dlb2->num_dir_ports = rsrcs->num_dir_ports;
810         dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
811         dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
812         dlb2->num_dir_queues = dlb2->num_dir_ports;
813         if (dlb2->version == DLB2_HW_V2_5) {
814                 dlb2->credit_pool = rsrcs->num_credits;
815                 dlb2->max_credits = rsrcs->num_credits;
816         } else {
817                 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
818                 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
819                 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
820                 dlb2->max_dir_credits = rsrcs->num_dir_credits;
821         }
822
823         dlb2->configured = true;
824
825         return 0;
826 }
827
828 static void
829 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
830                                     uint8_t port_id,
831                                     struct rte_event_port_conf *port_conf)
832 {
833         RTE_SET_USED(port_id);
834         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
835
836         port_conf->new_event_threshold = dlb2->new_event_limit;
837         port_conf->dequeue_depth = 32;
838         port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
839         port_conf->event_port_cfg = 0;
840 }
841
842 static void
843 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
844                                      uint8_t queue_id,
845                                      struct rte_event_queue_conf *queue_conf)
846 {
847         RTE_SET_USED(dev);
848         RTE_SET_USED(queue_id);
849
850         queue_conf->nb_atomic_flows = 1024;
851         queue_conf->nb_atomic_order_sequences = 64;
852         queue_conf->event_queue_cfg = 0;
853         queue_conf->priority = 0;
854 }
855
856 static int32_t
857 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
858 {
859         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
860         struct dlb2_get_sn_allocation_args cfg;
861         int ret;
862
863         cfg.group = group;
864
865         ret = dlb2_iface_get_sn_allocation(handle, &cfg);
866         if (ret < 0) {
867                 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
868                              ret, dlb2_error_strings[cfg.response.status]);
869                 return ret;
870         }
871
872         return cfg.response.id;
873 }
874
875 static int
876 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
877 {
878         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
879         struct dlb2_set_sn_allocation_args cfg;
880         int ret;
881
882         cfg.num = num;
883         cfg.group = group;
884
885         ret = dlb2_iface_set_sn_allocation(handle, &cfg);
886         if (ret < 0) {
887                 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
888                              ret, dlb2_error_strings[cfg.response.status]);
889                 return ret;
890         }
891
892         return ret;
893 }
894
895 static int32_t
896 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
897 {
898         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
899         struct dlb2_get_sn_occupancy_args cfg;
900         int ret;
901
902         cfg.group = group;
903
904         ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
905         if (ret < 0) {
906                 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
907                              ret, dlb2_error_strings[cfg.response.status]);
908                 return ret;
909         }
910
911         return cfg.response.id;
912 }
913
914 /* Query the current sequence number allocations and, if they conflict with the
915  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
916  * This is best-effort; if it fails, the PMD will attempt to configure the
917  * load-balanced queue and return an error.
918  */
919 static void
920 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
921                            const struct rte_event_queue_conf *queue_conf)
922 {
923         int grp_occupancy[DLB2_NUM_SN_GROUPS];
924         int grp_alloc[DLB2_NUM_SN_GROUPS];
925         int i, sequence_numbers;
926
927         sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
928
929         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
930                 int total_slots;
931
932                 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
933                 if (grp_alloc[i] < 0)
934                         return;
935
936                 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
937
938                 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
939                 if (grp_occupancy[i] < 0)
940                         return;
941
942                 /* DLB has at least one available slot for the requested
943                  * sequence numbers, so no further configuration required.
944                  */
945                 if (grp_alloc[i] == sequence_numbers &&
946                     grp_occupancy[i] < total_slots)
947                         return;
948         }
949
950         /* None of the sequence number groups are configured for the requested
951          * sequence numbers, so we have to reconfigure one of them. This is
952          * only possible if a group is not in use.
953          */
954         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
955                 if (grp_occupancy[i] == 0)
956                         break;
957         }
958
959         if (i == DLB2_NUM_SN_GROUPS) {
960                 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
961                        __func__, sequence_numbers);
962                 return;
963         }
964
965         /* Attempt to configure slot i with the requested number of sequence
966          * numbers. Ignore the return value -- if this fails, the error will be
967          * caught during subsequent queue configuration.
968          */
969         dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
970 }
971
972 static int32_t
973 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
974                          struct dlb2_eventdev_queue *ev_queue,
975                          const struct rte_event_queue_conf *evq_conf)
976 {
977         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
978         struct dlb2_queue *queue = &ev_queue->qm_queue;
979         struct dlb2_create_ldb_queue_args cfg;
980         int32_t ret;
981         uint32_t qm_qid;
982         int sched_type = -1;
983
984         if (evq_conf == NULL)
985                 return -EINVAL;
986
987         if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
988                 if (evq_conf->nb_atomic_order_sequences != 0)
989                         sched_type = RTE_SCHED_TYPE_ORDERED;
990                 else
991                         sched_type = RTE_SCHED_TYPE_PARALLEL;
992         } else
993                 sched_type = evq_conf->schedule_type;
994
995         cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
996         cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
997         cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
998
999         if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1000                 cfg.num_sequence_numbers = 0;
1001                 cfg.num_qid_inflights = 2048;
1002         }
1003
1004         /* App should set this to the number of hardware flows they want, not
1005          * the overall number of flows they're going to use. E.g. if app is
1006          * using 64 flows and sets compression to 64, best-case they'll get
1007          * 64 unique hashed flows in hardware.
1008          */
1009         switch (evq_conf->nb_atomic_flows) {
1010         /* Valid DLB2 compression levels */
1011         case 64:
1012         case 128:
1013         case 256:
1014         case 512:
1015         case (1 * 1024): /* 1K */
1016         case (2 * 1024): /* 2K */
1017         case (4 * 1024): /* 4K */
1018         case (64 * 1024): /* 64K */
1019                 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1020                 break;
1021         default:
1022                 /* Invalid compression level */
1023                 cfg.lock_id_comp_level = 0; /* no compression */
1024         }
1025
1026         if (ev_queue->depth_threshold == 0) {
1027                 cfg.depth_threshold = dlb2->default_depth_thresh;
1028                 ev_queue->depth_threshold =
1029                         dlb2->default_depth_thresh;
1030         } else
1031                 cfg.depth_threshold = ev_queue->depth_threshold;
1032
1033         ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1034         if (ret < 0) {
1035                 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1036                              ret, dlb2_error_strings[cfg.response.status]);
1037                 return -EINVAL;
1038         }
1039
1040         qm_qid = cfg.response.id;
1041
1042         /* Save off queue config for debug, resource lookups, and reconfig */
1043         queue->num_qid_inflights = cfg.num_qid_inflights;
1044         queue->num_atm_inflights = cfg.num_atomic_inflights;
1045
1046         queue->sched_type = sched_type;
1047         queue->config_state = DLB2_CONFIGURED;
1048
1049         DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1050                      qm_qid,
1051                      cfg.num_atomic_inflights,
1052                      cfg.num_sequence_numbers,
1053                      cfg.num_qid_inflights);
1054
1055         return qm_qid;
1056 }
1057
1058 static int
1059 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1060                               struct dlb2_eventdev_queue *ev_queue,
1061                               const struct rte_event_queue_conf *queue_conf)
1062 {
1063         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1064         int32_t qm_qid;
1065
1066         if (queue_conf->nb_atomic_order_sequences)
1067                 dlb2_program_sn_allocation(dlb2, queue_conf);
1068
1069         qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1070         if (qm_qid < 0) {
1071                 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1072
1073                 return qm_qid;
1074         }
1075
1076         dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1077
1078         ev_queue->qm_queue.id = qm_qid;
1079
1080         return 0;
1081 }
1082
1083 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1084 {
1085         int i, num = 0;
1086
1087         for (i = 0; i < dlb2->num_queues; i++) {
1088                 if (dlb2->ev_queues[i].setup_done &&
1089                     dlb2->ev_queues[i].qm_queue.is_directed)
1090                         num++;
1091         }
1092
1093         return num;
1094 }
1095
1096 static void
1097 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1098                          struct dlb2_eventdev_queue *ev_queue)
1099 {
1100         struct dlb2_eventdev_port *ev_port;
1101         int i, j;
1102
1103         for (i = 0; i < dlb2->num_ports; i++) {
1104                 ev_port = &dlb2->ev_ports[i];
1105
1106                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1107                         if (!ev_port->link[j].valid ||
1108                             ev_port->link[j].queue_id != ev_queue->id)
1109                                 continue;
1110
1111                         ev_port->link[j].valid = false;
1112                         ev_port->num_links--;
1113                 }
1114         }
1115
1116         ev_queue->num_links = 0;
1117 }
1118
1119 static int
1120 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1121                           uint8_t ev_qid,
1122                           const struct rte_event_queue_conf *queue_conf)
1123 {
1124         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1125         struct dlb2_eventdev_queue *ev_queue;
1126         int ret;
1127
1128         if (queue_conf == NULL)
1129                 return -EINVAL;
1130
1131         if (ev_qid >= dlb2->num_queues)
1132                 return -EINVAL;
1133
1134         ev_queue = &dlb2->ev_queues[ev_qid];
1135
1136         ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1137                 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1138         ev_queue->id = ev_qid;
1139         ev_queue->conf = *queue_conf;
1140
1141         if (!ev_queue->qm_queue.is_directed) {
1142                 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1143         } else {
1144                 /* The directed queue isn't setup until link time, at which
1145                  * point we know its directed port ID. Directed queue setup
1146                  * will only fail if this queue is already setup or there are
1147                  * no directed queues left to configure.
1148                  */
1149                 ret = 0;
1150
1151                 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1152
1153                 if (ev_queue->setup_done ||
1154                     dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1155                         ret = -EINVAL;
1156         }
1157
1158         /* Tear down pre-existing port->queue links */
1159         if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1160                 dlb2_queue_link_teardown(dlb2, ev_queue);
1161
1162         if (!ret)
1163                 ev_queue->setup_done = true;
1164
1165         return ret;
1166 }
1167
1168 static int
1169 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1170 {
1171         struct dlb2_cq_pop_qe *qe;
1172
1173         qe = rte_zmalloc(mz_name,
1174                         DLB2_NUM_QES_PER_CACHE_LINE *
1175                                 sizeof(struct dlb2_cq_pop_qe),
1176                         RTE_CACHE_LINE_SIZE);
1177
1178         if (qe == NULL) {
1179                 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1180                 return -ENOMEM;
1181         }
1182         qm_port->consume_qe = qe;
1183
1184         qe->qe_valid = 0;
1185         qe->qe_frag = 0;
1186         qe->qe_comp = 0;
1187         qe->cq_token = 1;
1188         /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1189          * and so on.
1190          */
1191         qe->tokens = 0; /* set at run time */
1192         qe->meas_lat = 0;
1193         qe->no_dec = 0;
1194         /* Completion IDs are disabled */
1195         qe->cmp_id = 0;
1196
1197         return 0;
1198 }
1199
1200 static int
1201 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1202 {
1203         struct dlb2_enqueue_qe *qe;
1204
1205         qe = rte_zmalloc(mz_name,
1206                         DLB2_NUM_QES_PER_CACHE_LINE *
1207                                 sizeof(struct dlb2_enqueue_qe),
1208                         RTE_CACHE_LINE_SIZE);
1209
1210         if (qe == NULL) {
1211                 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1212                 return -ENOMEM;
1213         }
1214         qm_port->int_arm_qe = qe;
1215
1216         /* V2 - INT ARM is CQ_TOKEN + FRAG */
1217         qe->qe_valid = 0;
1218         qe->qe_frag = 1;
1219         qe->qe_comp = 0;
1220         qe->cq_token = 1;
1221         qe->meas_lat = 0;
1222         qe->no_dec = 0;
1223         /* Completion IDs are disabled */
1224         qe->cmp_id = 0;
1225
1226         return 0;
1227 }
1228
1229 static int
1230 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1231 {
1232         int ret, sz;
1233
1234         sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1235
1236         qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1237
1238         if (qm_port->qe4 == NULL) {
1239                 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1240                 ret = -ENOMEM;
1241                 goto error_exit;
1242         }
1243
1244         ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1245         if (ret < 0) {
1246                 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1247                 goto error_exit;
1248         }
1249
1250         ret = dlb2_init_consume_qe(qm_port, mz_name);
1251         if (ret < 0) {
1252                 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1253                 goto error_exit;
1254         }
1255
1256         return 0;
1257
1258 error_exit:
1259
1260         dlb2_free_qe_mem(qm_port);
1261
1262         return ret;
1263 }
1264
1265 static inline uint16_t
1266 dlb2_event_enqueue_delayed(void *event_port,
1267                            const struct rte_event events[]);
1268
1269 static inline uint16_t
1270 dlb2_event_enqueue_burst_delayed(void *event_port,
1271                                  const struct rte_event events[],
1272                                  uint16_t num);
1273
1274 static inline uint16_t
1275 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1276                                      const struct rte_event events[],
1277                                      uint16_t num);
1278
1279 static inline uint16_t
1280 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1281                                          const struct rte_event events[],
1282                                          uint16_t num);
1283
1284 /* Generate the required bitmask for rotate-style expected QE gen bits.
1285  * This requires a pattern of 1's and zeros, starting with expected as
1286  * 1 bits, so when hardware writes 0's they're "new". This requires the
1287  * ring size to be powers of 2 to wrap correctly.
1288  */
1289 static void
1290 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1291 {
1292         uint64_t cq_build_mask = 0;
1293         uint32_t i;
1294
1295         if (cq_depth > 64)
1296                 return; /* need to fall back to scalar code */
1297
1298         /*
1299          * all 1's in first u64, all zeros in second is correct bit pattern to
1300          * start. Special casing == 64 easier than adapting complex loop logic.
1301          */
1302         if (cq_depth == 64) {
1303                 qm_port->cq_rolling_mask = 0;
1304                 qm_port->cq_rolling_mask_2 = -1;
1305                 return;
1306         }
1307
1308         for (i = 0; i < 64; i += (cq_depth * 2))
1309                 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1310
1311         qm_port->cq_rolling_mask = cq_build_mask;
1312         qm_port->cq_rolling_mask_2 = cq_build_mask;
1313 }
1314
1315 static int
1316 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1317                         struct dlb2_eventdev_port *ev_port,
1318                         uint32_t dequeue_depth,
1319                         uint32_t enqueue_depth)
1320 {
1321         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1322         struct dlb2_create_ldb_port_args cfg = { {0} };
1323         int ret;
1324         struct dlb2_port *qm_port = NULL;
1325         char mz_name[RTE_MEMZONE_NAMESIZE];
1326         uint32_t qm_port_id;
1327         uint16_t ldb_credit_high_watermark = 0;
1328         uint16_t dir_credit_high_watermark = 0;
1329         uint16_t credit_high_watermark = 0;
1330
1331         if (handle == NULL)
1332                 return -EINVAL;
1333
1334         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1335                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1336                              DLB2_MIN_CQ_DEPTH);
1337                 return -EINVAL;
1338         }
1339
1340         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1341                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1342                              DLB2_MIN_ENQUEUE_DEPTH);
1343                 return -EINVAL;
1344         }
1345
1346         rte_spinlock_lock(&handle->resource_lock);
1347
1348         /* We round up to the next power of 2 if necessary */
1349         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1350         cfg.cq_depth_threshold = 1;
1351
1352         cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1353
1354         if (handle->cos_id == DLB2_COS_DEFAULT)
1355                 cfg.cos_id = 0;
1356         else
1357                 cfg.cos_id = handle->cos_id;
1358
1359         cfg.cos_strict = 0;
1360
1361         /* User controls the LDB high watermark via enqueue depth. The DIR high
1362          * watermark is equal, unless the directed credit pool is too small.
1363          */
1364         if (dlb2->version == DLB2_HW_V2) {
1365                 ldb_credit_high_watermark = enqueue_depth;
1366                 /* If there are no directed ports, the kernel driver will
1367                  * ignore this port's directed credit settings. Don't use
1368                  * enqueue_depth if it would require more directed credits
1369                  * than are available.
1370                  */
1371                 dir_credit_high_watermark =
1372                         RTE_MIN(enqueue_depth,
1373                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1374         } else
1375                 credit_high_watermark = enqueue_depth;
1376
1377         /* Per QM values */
1378
1379         ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1380         if (ret < 0) {
1381                 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1382                              ret, dlb2_error_strings[cfg.response.status]);
1383                 goto error_exit;
1384         }
1385
1386         qm_port_id = cfg.response.id;
1387
1388         DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1389                      ev_port->id, qm_port_id);
1390
1391         qm_port = &ev_port->qm_port;
1392         qm_port->ev_port = ev_port; /* back ptr */
1393         qm_port->dlb2 = dlb2; /* back ptr */
1394         /*
1395          * Allocate and init local qe struct(s).
1396          * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1397          */
1398
1399         snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1400                  ev_port->id);
1401
1402         ret = dlb2_init_qe_mem(qm_port, mz_name);
1403         if (ret < 0) {
1404                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1405                 goto error_exit;
1406         }
1407
1408         qm_port->id = qm_port_id;
1409
1410         if (dlb2->version == DLB2_HW_V2) {
1411                 qm_port->cached_ldb_credits = 0;
1412                 qm_port->cached_dir_credits = 0;
1413         } else
1414                 qm_port->cached_credits = 0;
1415
1416         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1417          * the effective depth is smaller.
1418          */
1419         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1420         qm_port->cq_idx = 0;
1421         qm_port->cq_idx_unmasked = 0;
1422
1423         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1424                 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1425         else
1426                 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1427
1428         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1429         /* starting value of gen bit - it toggles at wrap time */
1430         qm_port->gen_bit = 1;
1431
1432         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1433
1434         qm_port->int_armed = false;
1435
1436         /* Save off for later use in info and lookup APIs. */
1437         qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1438
1439         qm_port->dequeue_depth = dequeue_depth;
1440         qm_port->token_pop_thresh = dequeue_depth;
1441
1442         /* The default enqueue functions do not include delayed-pop support for
1443          * performance reasons.
1444          */
1445         if (qm_port->token_pop_mode == DELAYED_POP) {
1446                 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1447                 dlb2->event_dev->enqueue_burst =
1448                         dlb2_event_enqueue_burst_delayed;
1449                 dlb2->event_dev->enqueue_new_burst =
1450                         dlb2_event_enqueue_new_burst_delayed;
1451                 dlb2->event_dev->enqueue_forward_burst =
1452                         dlb2_event_enqueue_forward_burst_delayed;
1453         }
1454
1455         qm_port->owed_tokens = 0;
1456         qm_port->issued_releases = 0;
1457
1458         /* Save config message too. */
1459         rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1460
1461         /* update state */
1462         qm_port->state = PORT_STARTED; /* enabled at create time */
1463         qm_port->config_state = DLB2_CONFIGURED;
1464
1465         if (dlb2->version == DLB2_HW_V2) {
1466                 qm_port->dir_credits = dir_credit_high_watermark;
1467                 qm_port->ldb_credits = ldb_credit_high_watermark;
1468                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1469                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1470
1471                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1472                              qm_port_id,
1473                              dequeue_depth,
1474                              qm_port->ldb_credits,
1475                              qm_port->dir_credits);
1476         } else {
1477                 qm_port->credits = credit_high_watermark;
1478                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1479
1480                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1481                              qm_port_id,
1482                              dequeue_depth,
1483                              qm_port->credits);
1484         }
1485
1486         qm_port->use_scalar = false;
1487
1488 #if (!defined RTE_ARCH_X86_64)
1489         qm_port->use_scalar = true;
1490 #else
1491         if ((qm_port->cq_depth > 64) ||
1492             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1493             (dlb2->vector_opts_enabled == false))
1494                 qm_port->use_scalar = true;
1495 #endif
1496
1497         rte_spinlock_unlock(&handle->resource_lock);
1498
1499         return 0;
1500
1501 error_exit:
1502
1503         if (qm_port)
1504                 dlb2_free_qe_mem(qm_port);
1505
1506         rte_spinlock_unlock(&handle->resource_lock);
1507
1508         DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1509
1510         return ret;
1511 }
1512
1513 static void
1514 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1515                         struct dlb2_eventdev_port *ev_port)
1516 {
1517         struct dlb2_eventdev_queue *ev_queue;
1518         int i;
1519
1520         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1521                 if (!ev_port->link[i].valid)
1522                         continue;
1523
1524                 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1525
1526                 ev_port->link[i].valid = false;
1527                 ev_port->num_links--;
1528                 ev_queue->num_links--;
1529         }
1530 }
1531
1532 static int
1533 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1534                         struct dlb2_eventdev_port *ev_port,
1535                         uint32_t dequeue_depth,
1536                         uint32_t enqueue_depth)
1537 {
1538         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1539         struct dlb2_create_dir_port_args cfg = { {0} };
1540         int ret;
1541         struct dlb2_port *qm_port = NULL;
1542         char mz_name[RTE_MEMZONE_NAMESIZE];
1543         uint32_t qm_port_id;
1544         uint16_t ldb_credit_high_watermark = 0;
1545         uint16_t dir_credit_high_watermark = 0;
1546         uint16_t credit_high_watermark = 0;
1547
1548         if (dlb2 == NULL || handle == NULL)
1549                 return -EINVAL;
1550
1551         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1552                 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1553                              DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1554                 return -EINVAL;
1555         }
1556
1557         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1558                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1559                              DLB2_MIN_ENQUEUE_DEPTH);
1560                 return -EINVAL;
1561         }
1562
1563         rte_spinlock_lock(&handle->resource_lock);
1564
1565         /* Directed queues are configured at link time. */
1566         cfg.queue_id = -1;
1567
1568         /* We round up to the next power of 2 if necessary */
1569         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1570         cfg.cq_depth_threshold = 1;
1571
1572         /* User controls the LDB high watermark via enqueue depth. The DIR high
1573          * watermark is equal, unless the directed credit pool is too small.
1574          */
1575         if (dlb2->version == DLB2_HW_V2) {
1576                 ldb_credit_high_watermark = enqueue_depth;
1577                 /* Don't use enqueue_depth if it would require more directed
1578                  * credits than are available.
1579                  */
1580                 dir_credit_high_watermark =
1581                         RTE_MIN(enqueue_depth,
1582                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1583         } else
1584                 credit_high_watermark = enqueue_depth;
1585
1586         /* Per QM values */
1587
1588         ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1589         if (ret < 0) {
1590                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1591                              ret, dlb2_error_strings[cfg.response.status]);
1592                 goto error_exit;
1593         }
1594
1595         qm_port_id = cfg.response.id;
1596
1597         DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1598                      ev_port->id, qm_port_id);
1599
1600         qm_port = &ev_port->qm_port;
1601         qm_port->ev_port = ev_port; /* back ptr */
1602         qm_port->dlb2 = dlb2;  /* back ptr */
1603
1604         /*
1605          * Init local qe struct(s).
1606          * Note: MOVDIR64 requires the enqueue QE to be aligned
1607          */
1608
1609         snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1610                  ev_port->id);
1611
1612         ret = dlb2_init_qe_mem(qm_port, mz_name);
1613
1614         if (ret < 0) {
1615                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1616                 goto error_exit;
1617         }
1618
1619         qm_port->id = qm_port_id;
1620
1621         if (dlb2->version == DLB2_HW_V2) {
1622                 qm_port->cached_ldb_credits = 0;
1623                 qm_port->cached_dir_credits = 0;
1624         } else
1625                 qm_port->cached_credits = 0;
1626
1627         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1628          * the effective depth is smaller.
1629          */
1630         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1631         qm_port->cq_idx = 0;
1632         qm_port->cq_idx_unmasked = 0;
1633
1634         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1635                 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1636         else
1637                 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1638
1639         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1640         /* starting value of gen bit - it toggles at wrap time */
1641         qm_port->gen_bit = 1;
1642         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1643
1644         qm_port->int_armed = false;
1645
1646         /* Save off for later use in info and lookup APIs. */
1647         qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1648
1649         qm_port->dequeue_depth = dequeue_depth;
1650
1651         /* Directed ports are auto-pop, by default. */
1652         qm_port->token_pop_mode = AUTO_POP;
1653         qm_port->owed_tokens = 0;
1654         qm_port->issued_releases = 0;
1655
1656         /* Save config message too. */
1657         rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1658
1659         /* update state */
1660         qm_port->state = PORT_STARTED; /* enabled at create time */
1661         qm_port->config_state = DLB2_CONFIGURED;
1662
1663         if (dlb2->version == DLB2_HW_V2) {
1664                 qm_port->dir_credits = dir_credit_high_watermark;
1665                 qm_port->ldb_credits = ldb_credit_high_watermark;
1666                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1667                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1668
1669                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1670                              qm_port_id,
1671                              dequeue_depth,
1672                              dir_credit_high_watermark,
1673                              ldb_credit_high_watermark);
1674         } else {
1675                 qm_port->credits = credit_high_watermark;
1676                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1677
1678                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1679                              qm_port_id,
1680                              dequeue_depth,
1681                              credit_high_watermark);
1682         }
1683
1684 #if (!defined RTE_ARCH_X86_64)
1685         qm_port->use_scalar = true;
1686 #else
1687         if ((qm_port->cq_depth > 64) ||
1688             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1689             (dlb2->vector_opts_enabled == false))
1690                 qm_port->use_scalar = true;
1691 #endif
1692
1693         rte_spinlock_unlock(&handle->resource_lock);
1694
1695         return 0;
1696
1697 error_exit:
1698
1699         if (qm_port)
1700                 dlb2_free_qe_mem(qm_port);
1701
1702         rte_spinlock_unlock(&handle->resource_lock);
1703
1704         DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1705
1706         return ret;
1707 }
1708
1709 static int
1710 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1711                          uint8_t ev_port_id,
1712                          const struct rte_event_port_conf *port_conf)
1713 {
1714         struct dlb2_eventdev *dlb2;
1715         struct dlb2_eventdev_port *ev_port;
1716         int ret;
1717         uint32_t hw_credit_quanta, sw_credit_quanta;
1718
1719         if (dev == NULL || port_conf == NULL) {
1720                 DLB2_LOG_ERR("Null parameter\n");
1721                 return -EINVAL;
1722         }
1723
1724         dlb2 = dlb2_pmd_priv(dev);
1725
1726         if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1727                 return -EINVAL;
1728
1729         if (port_conf->dequeue_depth >
1730                 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1731             port_conf->enqueue_depth >
1732                 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1733                 return -EINVAL;
1734
1735         ev_port = &dlb2->ev_ports[ev_port_id];
1736         /* configured? */
1737         if (ev_port->setup_done) {
1738                 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1739                 return -EINVAL;
1740         }
1741
1742         ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1743                 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1744
1745         if (!ev_port->qm_port.is_directed) {
1746                 ret = dlb2_hw_create_ldb_port(dlb2,
1747                                               ev_port,
1748                                               port_conf->dequeue_depth,
1749                                               port_conf->enqueue_depth);
1750                 if (ret < 0) {
1751                         DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1752                                      ev_port_id);
1753
1754                         return ret;
1755                 }
1756         } else {
1757                 ret = dlb2_hw_create_dir_port(dlb2,
1758                                               ev_port,
1759                                               port_conf->dequeue_depth,
1760                                               port_conf->enqueue_depth);
1761                 if (ret < 0) {
1762                         DLB2_LOG_ERR("Failed to create the DIR port\n");
1763                         return ret;
1764                 }
1765         }
1766
1767         /* Save off port config for reconfig */
1768         ev_port->conf = *port_conf;
1769
1770         ev_port->id = ev_port_id;
1771         ev_port->enq_configured = true;
1772         ev_port->setup_done = true;
1773         ev_port->inflight_max = port_conf->new_event_threshold;
1774         ev_port->implicit_release = !(port_conf->event_port_cfg &
1775                   RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1776         ev_port->outstanding_releases = 0;
1777         ev_port->inflight_credits = 0;
1778         ev_port->dlb2 = dlb2; /* reverse link */
1779
1780         /* Default for worker ports */
1781         sw_credit_quanta = dlb2->sw_credit_quanta;
1782         hw_credit_quanta = dlb2->hw_credit_quanta;
1783
1784         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1785                 /* Producer type ports. Mostly enqueue */
1786                 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1787                 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1788         }
1789         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1790                 /* Consumer type ports. Mostly dequeue */
1791                 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1792                 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1793         }
1794         ev_port->credit_update_quanta = sw_credit_quanta;
1795         ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1796
1797         /* Tear down pre-existing port->queue links */
1798         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1799                 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1800
1801         dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1802
1803         return 0;
1804 }
1805
1806 static int16_t
1807 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1808                             uint32_t qm_port_id,
1809                             uint16_t qm_qid,
1810                             uint8_t priority)
1811 {
1812         struct dlb2_map_qid_args cfg;
1813         int32_t ret;
1814
1815         if (handle == NULL)
1816                 return -EINVAL;
1817
1818         /* Build message */
1819         cfg.port_id = qm_port_id;
1820         cfg.qid = qm_qid;
1821         cfg.priority = EV_TO_DLB2_PRIO(priority);
1822
1823         ret = dlb2_iface_map_qid(handle, &cfg);
1824         if (ret < 0) {
1825                 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1826                              ret, dlb2_error_strings[cfg.response.status]);
1827                 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1828                              handle->domain_id, cfg.port_id,
1829                              cfg.qid,
1830                              cfg.priority);
1831         } else {
1832                 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1833                              qm_qid, qm_port_id);
1834         }
1835
1836         return ret;
1837 }
1838
1839 static int
1840 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1841                           struct dlb2_eventdev_port *ev_port,
1842                           struct dlb2_eventdev_queue *ev_queue,
1843                           uint8_t priority)
1844 {
1845         int first_avail = -1;
1846         int ret, i;
1847
1848         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1849                 if (ev_port->link[i].valid) {
1850                         if (ev_port->link[i].queue_id == ev_queue->id &&
1851                             ev_port->link[i].priority == priority) {
1852                                 if (ev_port->link[i].mapped)
1853                                         return 0; /* already mapped */
1854                                 first_avail = i;
1855                         }
1856                 } else if (first_avail == -1)
1857                         first_avail = i;
1858         }
1859         if (first_avail == -1) {
1860                 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1861                              ev_port->qm_port.id);
1862                 return -EINVAL;
1863         }
1864
1865         ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1866                                           ev_port->qm_port.id,
1867                                           ev_queue->qm_queue.id,
1868                                           priority);
1869
1870         if (!ret)
1871                 ev_port->link[first_avail].mapped = true;
1872
1873         return ret;
1874 }
1875
1876 static int32_t
1877 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1878                          struct dlb2_eventdev_queue *ev_queue,
1879                          int32_t qm_port_id)
1880 {
1881         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1882         struct dlb2_create_dir_queue_args cfg;
1883         int32_t ret;
1884
1885         /* The directed port is always configured before its queue */
1886         cfg.port_id = qm_port_id;
1887
1888         if (ev_queue->depth_threshold == 0) {
1889                 cfg.depth_threshold = dlb2->default_depth_thresh;
1890                 ev_queue->depth_threshold =
1891                         dlb2->default_depth_thresh;
1892         } else
1893                 cfg.depth_threshold = ev_queue->depth_threshold;
1894
1895         ret = dlb2_iface_dir_queue_create(handle, &cfg);
1896         if (ret < 0) {
1897                 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1898                              ret, dlb2_error_strings[cfg.response.status]);
1899                 return -EINVAL;
1900         }
1901
1902         return cfg.response.id;
1903 }
1904
1905 static int
1906 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1907                               struct dlb2_eventdev_queue *ev_queue,
1908                               struct dlb2_eventdev_port *ev_port)
1909 {
1910         int32_t qm_qid;
1911
1912         qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1913
1914         if (qm_qid < 0) {
1915                 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1916                 return qm_qid;
1917         }
1918
1919         dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1920
1921         ev_queue->qm_queue.id = qm_qid;
1922
1923         return 0;
1924 }
1925
1926 static int
1927 dlb2_do_port_link(struct rte_eventdev *dev,
1928                   struct dlb2_eventdev_queue *ev_queue,
1929                   struct dlb2_eventdev_port *ev_port,
1930                   uint8_t prio)
1931 {
1932         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1933         int err;
1934
1935         /* Don't link until start time. */
1936         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1937                 return 0;
1938
1939         if (ev_queue->qm_queue.is_directed)
1940                 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1941         else
1942                 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1943
1944         if (err) {
1945                 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1946                              ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1947                              ev_queue->id, ev_port->id);
1948
1949                 rte_errno = err;
1950                 return -1;
1951         }
1952
1953         return 0;
1954 }
1955
1956 static int
1957 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1958                         uint8_t queue_id,
1959                         bool link_exists,
1960                         int index)
1961 {
1962         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1963         struct dlb2_eventdev_queue *ev_queue;
1964         bool port_is_dir, queue_is_dir;
1965
1966         if (queue_id > dlb2->num_queues) {
1967                 rte_errno = -EINVAL;
1968                 return -1;
1969         }
1970
1971         ev_queue = &dlb2->ev_queues[queue_id];
1972
1973         if (!ev_queue->setup_done &&
1974             ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1975                 rte_errno = -EINVAL;
1976                 return -1;
1977         }
1978
1979         port_is_dir = ev_port->qm_port.is_directed;
1980         queue_is_dir = ev_queue->qm_queue.is_directed;
1981
1982         if (port_is_dir != queue_is_dir) {
1983                 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1984                              queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1985                              port_is_dir ? "DIR" : "LDB", ev_port->id);
1986
1987                 rte_errno = -EINVAL;
1988                 return -1;
1989         }
1990
1991         /* Check if there is space for the requested link */
1992         if (!link_exists && index == -1) {
1993                 DLB2_LOG_ERR("no space for new link\n");
1994                 rte_errno = -ENOSPC;
1995                 return -1;
1996         }
1997
1998         /* Check if the directed port is already linked */
1999         if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2000             !link_exists) {
2001                 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2002                              ev_port->id);
2003                 rte_errno = -EINVAL;
2004                 return -1;
2005         }
2006
2007         /* Check if the directed queue is already linked */
2008         if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2009             !link_exists) {
2010                 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2011                              ev_queue->id);
2012                 rte_errno = -EINVAL;
2013                 return -1;
2014         }
2015
2016         return 0;
2017 }
2018
2019 static int
2020 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2021                         const uint8_t queues[], const uint8_t priorities[],
2022                         uint16_t nb_links)
2023
2024 {
2025         struct dlb2_eventdev_port *ev_port = event_port;
2026         struct dlb2_eventdev *dlb2;
2027         int i, j;
2028
2029         RTE_SET_USED(dev);
2030
2031         if (ev_port == NULL) {
2032                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2033                 rte_errno = -EINVAL;
2034                 return 0;
2035         }
2036
2037         if (!ev_port->setup_done &&
2038             ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2039                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2040                 rte_errno = -EINVAL;
2041                 return 0;
2042         }
2043
2044         /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2045          * queues pointer.
2046          */
2047         if (nb_links == 0) {
2048                 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2049                 return 0; /* Ignore and return success */
2050         }
2051
2052         dlb2 = ev_port->dlb2;
2053
2054         DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2055                      nb_links,
2056                      ev_port->qm_port.is_directed ? "DIR" : "LDB",
2057                      ev_port->id);
2058
2059         for (i = 0; i < nb_links; i++) {
2060                 struct dlb2_eventdev_queue *ev_queue;
2061                 uint8_t queue_id, prio;
2062                 bool found = false;
2063                 int index = -1;
2064
2065                 queue_id = queues[i];
2066                 prio = priorities[i];
2067
2068                 /* Check if the link already exists. */
2069                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2070                         if (ev_port->link[j].valid) {
2071                                 if (ev_port->link[j].queue_id == queue_id) {
2072                                         found = true;
2073                                         index = j;
2074                                         break;
2075                                 }
2076                         } else if (index == -1) {
2077                                 index = j;
2078                         }
2079
2080                 /* could not link */
2081                 if (index == -1)
2082                         break;
2083
2084                 /* Check if already linked at the requested priority */
2085                 if (found && ev_port->link[j].priority == prio)
2086                         continue;
2087
2088                 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2089                         break; /* return index of offending queue */
2090
2091                 ev_queue = &dlb2->ev_queues[queue_id];
2092
2093                 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2094                         break; /* return index of offending queue */
2095
2096                 ev_queue->num_links++;
2097
2098                 ev_port->link[index].queue_id = queue_id;
2099                 ev_port->link[index].priority = prio;
2100                 ev_port->link[index].valid = true;
2101                 /* Entry already exists?  If so, then must be prio change */
2102                 if (!found)
2103                         ev_port->num_links++;
2104         }
2105         return i;
2106 }
2107
2108 static int16_t
2109 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2110                                 uint32_t qm_port_id,
2111                                 uint16_t qm_qid)
2112 {
2113         struct dlb2_unmap_qid_args cfg;
2114         int32_t ret;
2115
2116         if (handle == NULL)
2117                 return -EINVAL;
2118
2119         cfg.port_id = qm_port_id;
2120         cfg.qid = qm_qid;
2121
2122         ret = dlb2_iface_unmap_qid(handle, &cfg);
2123         if (ret < 0)
2124                 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2125                              ret, dlb2_error_strings[cfg.response.status]);
2126
2127         return ret;
2128 }
2129
2130 static int
2131 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2132                             struct dlb2_eventdev_port *ev_port,
2133                             struct dlb2_eventdev_queue *ev_queue)
2134 {
2135         int ret, i;
2136
2137         /* Don't unlink until start time. */
2138         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2139                 return 0;
2140
2141         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2142                 if (ev_port->link[i].valid &&
2143                     ev_port->link[i].queue_id == ev_queue->id)
2144                         break; /* found */
2145         }
2146
2147         /* This is expected with eventdev API!
2148          * It blindly attemmpts to unmap all queues.
2149          */
2150         if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2151                 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2152                              ev_queue->qm_queue.id,
2153                              ev_port->qm_port.id);
2154                 return 0;
2155         }
2156
2157         ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2158                                               ev_port->qm_port.id,
2159                                               ev_queue->qm_queue.id);
2160         if (!ret)
2161                 ev_port->link[i].mapped = false;
2162
2163         return ret;
2164 }
2165
2166 static int
2167 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2168                           uint8_t queues[], uint16_t nb_unlinks)
2169 {
2170         struct dlb2_eventdev_port *ev_port = event_port;
2171         struct dlb2_eventdev *dlb2;
2172         int i;
2173
2174         RTE_SET_USED(dev);
2175
2176         if (!ev_port->setup_done) {
2177                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2178                              ev_port->id);
2179                 rte_errno = -EINVAL;
2180                 return 0;
2181         }
2182
2183         if (queues == NULL || nb_unlinks == 0) {
2184                 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2185                 return 0; /* Ignore and return success */
2186         }
2187
2188         if (ev_port->qm_port.is_directed) {
2189                 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2190                              ev_port->id);
2191                 rte_errno = 0;
2192                 return nb_unlinks; /* as if success */
2193         }
2194
2195         dlb2 = ev_port->dlb2;
2196
2197         for (i = 0; i < nb_unlinks; i++) {
2198                 struct dlb2_eventdev_queue *ev_queue;
2199                 int ret, j;
2200
2201                 if (queues[i] >= dlb2->num_queues) {
2202                         DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2203                         rte_errno = -EINVAL;
2204                         return i; /* return index of offending queue */
2205                 }
2206
2207                 ev_queue = &dlb2->ev_queues[queues[i]];
2208
2209                 /* Does a link exist? */
2210                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2211                         if (ev_port->link[j].queue_id == queues[i] &&
2212                             ev_port->link[j].valid)
2213                                 break;
2214
2215                 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2216                         continue;
2217
2218                 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2219                 if (ret) {
2220                         DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2221                                      ret, ev_port->id, queues[i]);
2222                         rte_errno = -ENOENT;
2223                         return i; /* return index of offending queue */
2224                 }
2225
2226                 ev_port->link[j].valid = false;
2227                 ev_port->num_links--;
2228                 ev_queue->num_links--;
2229         }
2230
2231         return nb_unlinks;
2232 }
2233
2234 static int
2235 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2236                                        void *event_port)
2237 {
2238         struct dlb2_eventdev_port *ev_port = event_port;
2239         struct dlb2_eventdev *dlb2;
2240         struct dlb2_hw_dev *handle;
2241         struct dlb2_pending_port_unmaps_args cfg;
2242         int ret;
2243
2244         RTE_SET_USED(dev);
2245
2246         if (!ev_port->setup_done) {
2247                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2248                              ev_port->id);
2249                 rte_errno = -EINVAL;
2250                 return 0;
2251         }
2252
2253         cfg.port_id = ev_port->qm_port.id;
2254         dlb2 = ev_port->dlb2;
2255         handle = &dlb2->qm_instance;
2256         ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2257
2258         if (ret < 0) {
2259                 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2260                              ret, dlb2_error_strings[cfg.response.status]);
2261                 return ret;
2262         }
2263
2264         return cfg.response.id;
2265 }
2266
2267 static int
2268 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2269 {
2270         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2271         int ret, i;
2272
2273         /* If an event queue or port was previously configured, but hasn't been
2274          * reconfigured, reapply its original configuration.
2275          */
2276         for (i = 0; i < dlb2->num_queues; i++) {
2277                 struct dlb2_eventdev_queue *ev_queue;
2278
2279                 ev_queue = &dlb2->ev_queues[i];
2280
2281                 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2282                         continue;
2283
2284                 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2285                 if (ret < 0) {
2286                         DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2287                         return ret;
2288                 }
2289         }
2290
2291         for (i = 0; i < dlb2->num_ports; i++) {
2292                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2293
2294                 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2295                         continue;
2296
2297                 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2298                 if (ret < 0) {
2299                         DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2300                                      i);
2301                         return ret;
2302                 }
2303         }
2304
2305         return 0;
2306 }
2307
2308 static int
2309 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2310 {
2311         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2312         int i;
2313
2314         /* Perform requested port->queue links */
2315         for (i = 0; i < dlb2->num_ports; i++) {
2316                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2317                 int j;
2318
2319                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2320                         struct dlb2_eventdev_queue *ev_queue;
2321                         uint8_t prio, queue_id;
2322
2323                         if (!ev_port->link[j].valid)
2324                                 continue;
2325
2326                         prio = ev_port->link[j].priority;
2327                         queue_id = ev_port->link[j].queue_id;
2328
2329                         if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2330                                 return -EINVAL;
2331
2332                         ev_queue = &dlb2->ev_queues[queue_id];
2333
2334                         if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2335                                 return -EINVAL;
2336                 }
2337         }
2338
2339         return 0;
2340 }
2341
2342 static int
2343 dlb2_eventdev_start(struct rte_eventdev *dev)
2344 {
2345         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2346         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2347         struct dlb2_start_domain_args cfg;
2348         int ret, i;
2349
2350         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2351         if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2352                 DLB2_LOG_ERR("bad state %d for dev_start\n",
2353                              (int)dlb2->run_state);
2354                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2355                 return -EINVAL;
2356         }
2357         dlb2->run_state = DLB2_RUN_STATE_STARTING;
2358         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2359
2360         /* If the device was configured more than once, some event ports and/or
2361          * queues may need to be reconfigured.
2362          */
2363         ret = dlb2_eventdev_reapply_configuration(dev);
2364         if (ret)
2365                 return ret;
2366
2367         /* The DLB PMD delays port links until the device is started. */
2368         ret = dlb2_eventdev_apply_port_links(dev);
2369         if (ret)
2370                 return ret;
2371
2372         for (i = 0; i < dlb2->num_ports; i++) {
2373                 if (!dlb2->ev_ports[i].setup_done) {
2374                         DLB2_LOG_ERR("dlb2: port %d not setup", i);
2375                         return -ESTALE;
2376                 }
2377         }
2378
2379         for (i = 0; i < dlb2->num_queues; i++) {
2380                 if (dlb2->ev_queues[i].num_links == 0) {
2381                         DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2382                         return -ENOLINK;
2383                 }
2384         }
2385
2386         ret = dlb2_iface_sched_domain_start(handle, &cfg);
2387         if (ret < 0) {
2388                 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2389                              ret, dlb2_error_strings[cfg.response.status]);
2390                 return ret;
2391         }
2392
2393         dlb2->run_state = DLB2_RUN_STATE_STARTED;
2394         DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2395
2396         return 0;
2397 }
2398
2399 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2400         {
2401                 /* Load-balanced cmd bytes */
2402                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2403                 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2404                 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2405         },
2406         {
2407                 /* Directed cmd bytes */
2408                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2409                 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2410                 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2411         },
2412 };
2413
2414 static inline uint32_t
2415 dlb2_port_credits_get(struct dlb2_port *qm_port,
2416                       enum dlb2_hw_queue_types type)
2417 {
2418         uint32_t credits = *qm_port->credit_pool[type];
2419         /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2420         uint32_t batch_size = qm_port->hw_credit_quanta;
2421
2422         if (unlikely(credits < batch_size))
2423                 batch_size = credits;
2424
2425         if (likely(credits &&
2426                    __atomic_compare_exchange_n(
2427                         qm_port->credit_pool[type],
2428                         &credits, credits - batch_size, false,
2429                         __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2430                 return batch_size;
2431         else
2432                 return 0;
2433 }
2434
2435 static inline void
2436 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2437                           struct dlb2_eventdev_port *ev_port)
2438 {
2439         uint16_t quanta = ev_port->credit_update_quanta;
2440
2441         if (ev_port->inflight_credits >= quanta * 2) {
2442                 /* Replenish credits, saving one quanta for enqueues */
2443                 uint16_t val = ev_port->inflight_credits - quanta;
2444
2445                 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2446                 ev_port->inflight_credits -= val;
2447         }
2448 }
2449
2450 static inline int
2451 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2452                               struct dlb2_eventdev_port *ev_port)
2453 {
2454         uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2455                                                 __ATOMIC_SEQ_CST);
2456         const int num = 1;
2457
2458         if (unlikely(ev_port->inflight_max < sw_inflights)) {
2459                 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2460                 rte_errno = -ENOSPC;
2461                 return 1;
2462         }
2463
2464         if (ev_port->inflight_credits < num) {
2465                 /* check if event enqueue brings ev_port over max threshold */
2466                 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2467
2468                 if (sw_inflights + credit_update_quanta >
2469                                 dlb2->new_event_limit) {
2470                         DLB2_INC_STAT(
2471                         ev_port->stats.traffic.tx_nospc_new_event_limit,
2472                         1);
2473                         rte_errno = -ENOSPC;
2474                         return 1;
2475                 }
2476
2477                 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2478                                    __ATOMIC_SEQ_CST);
2479                 ev_port->inflight_credits += (credit_update_quanta);
2480
2481                 if (ev_port->inflight_credits < num) {
2482                         DLB2_INC_STAT(
2483                         ev_port->stats.traffic.tx_nospc_inflight_credits,
2484                         1);
2485                         rte_errno = -ENOSPC;
2486                         return 1;
2487                 }
2488         }
2489
2490         return 0;
2491 }
2492
2493 static inline int
2494 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2495 {
2496         if (unlikely(qm_port->cached_ldb_credits == 0)) {
2497                 qm_port->cached_ldb_credits =
2498                         dlb2_port_credits_get(qm_port,
2499                                               DLB2_LDB_QUEUE);
2500                 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2501                         DLB2_INC_STAT(
2502                         qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2503                         1);
2504                         DLB2_LOG_DBG("ldb credits exhausted\n");
2505                         return 1; /* credits exhausted */
2506                 }
2507         }
2508
2509         return 0;
2510 }
2511
2512 static inline int
2513 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2514 {
2515         if (unlikely(qm_port->cached_dir_credits == 0)) {
2516                 qm_port->cached_dir_credits =
2517                         dlb2_port_credits_get(qm_port,
2518                                               DLB2_DIR_QUEUE);
2519                 if (unlikely(qm_port->cached_dir_credits == 0)) {
2520                         DLB2_INC_STAT(
2521                         qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2522                         1);
2523                         DLB2_LOG_DBG("dir credits exhausted\n");
2524                         return 1; /* credits exhausted */
2525                 }
2526         }
2527
2528         return 0;
2529 }
2530
2531 static inline int
2532 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2533 {
2534         if (unlikely(qm_port->cached_credits == 0)) {
2535                 qm_port->cached_credits =
2536                         dlb2_port_credits_get(qm_port,
2537                                               DLB2_COMBINED_POOL);
2538                 if (unlikely(qm_port->cached_credits == 0)) {
2539                         DLB2_INC_STAT(
2540                         qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2541                         DLB2_LOG_DBG("credits exhausted\n");
2542                         return 1; /* credits exhausted */
2543                 }
2544         }
2545
2546         return 0;
2547 }
2548
2549 static __rte_always_inline void
2550 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2551               struct process_local_port_data *port_data)
2552 {
2553         dlb2_movdir64b(port_data->pp_addr, qe4);
2554 }
2555
2556 static inline int
2557 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2558 {
2559         struct process_local_port_data *port_data;
2560         struct dlb2_cq_pop_qe *qe;
2561
2562         RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2563
2564         qe = qm_port->consume_qe;
2565
2566         qe->tokens = num - 1;
2567
2568         /* No store fence needed since no pointer is being sent, and CQ token
2569          * pops can be safely reordered with other HCWs.
2570          */
2571         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2572
2573         dlb2_movntdq_single(port_data->pp_addr, qe);
2574
2575         DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2576
2577         qm_port->owed_tokens = 0;
2578
2579         return 0;
2580 }
2581
2582 static inline void
2583 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2584                    bool do_sfence,
2585                    struct process_local_port_data *port_data)
2586 {
2587         /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2588          * application writes complete before enqueueing the QE.
2589          */
2590         if (do_sfence)
2591                 rte_wmb();
2592
2593         dlb2_pp_write(qm_port->qe4, port_data);
2594 }
2595
2596 static inline void
2597 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2598 {
2599         struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2600         int num = qm_port->owed_tokens;
2601
2602         qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2603         qe[idx].tokens = num - 1;
2604
2605         qm_port->owed_tokens = 0;
2606 }
2607
2608 static inline void
2609 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2610                       const struct rte_event ev[],
2611                       int num,
2612                       uint8_t *sched_type,
2613                       uint8_t *queue_id)
2614 {
2615         struct dlb2_enqueue_qe *qe;
2616         uint16_t sched_word[4];
2617         __m128i sse_qe[2];
2618         int i;
2619
2620         qe = qm_port->qe4;
2621
2622         sse_qe[0] = _mm_setzero_si128();
2623         sse_qe[1] = _mm_setzero_si128();
2624
2625         switch (num) {
2626         case 4:
2627                 /* Construct the metadata portion of two HCWs in one 128b SSE
2628                  * register. HCW metadata is constructed in the SSE registers
2629                  * like so:
2630                  * sse_qe[0][63:0]:   qe[0]'s metadata
2631                  * sse_qe[0][127:64]: qe[1]'s metadata
2632                  * sse_qe[1][63:0]:   qe[2]'s metadata
2633                  * sse_qe[1][127:64]: qe[3]'s metadata
2634                  */
2635
2636                 /* Convert the event operation into a command byte and store it
2637                  * in the metadata:
2638                  * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
2639                  * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2640                  * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
2641                  * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2642                  */
2643 #define DLB2_QE_CMD_BYTE 7
2644                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2645                                 cmd_byte_map[qm_port->is_directed][ev[0].op],
2646                                 DLB2_QE_CMD_BYTE);
2647                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2648                                 cmd_byte_map[qm_port->is_directed][ev[1].op],
2649                                 DLB2_QE_CMD_BYTE + 8);
2650                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2651                                 cmd_byte_map[qm_port->is_directed][ev[2].op],
2652                                 DLB2_QE_CMD_BYTE);
2653                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2654                                 cmd_byte_map[qm_port->is_directed][ev[3].op],
2655                                 DLB2_QE_CMD_BYTE + 8);
2656
2657                 /* Store priority, scheduling type, and queue ID in the sched
2658                  * word array because these values are re-used when the
2659                  * destination is a directed queue.
2660                  */
2661                 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2662                                 sched_type[0] << 8 |
2663                                 queue_id[0];
2664                 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2665                                 sched_type[1] << 8 |
2666                                 queue_id[1];
2667                 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2668                                 sched_type[2] << 8 |
2669                                 queue_id[2];
2670                 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2671                                 sched_type[3] << 8 |
2672                                 queue_id[3];
2673
2674                 /* Store the event priority, scheduling type, and queue ID in
2675                  * the metadata:
2676                  * sse_qe[0][31:16] = sched_word[0]
2677                  * sse_qe[0][95:80] = sched_word[1]
2678                  * sse_qe[1][31:16] = sched_word[2]
2679                  * sse_qe[1][95:80] = sched_word[3]
2680                  */
2681 #define DLB2_QE_QID_SCHED_WORD 1
2682                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2683                                              sched_word[0],
2684                                              DLB2_QE_QID_SCHED_WORD);
2685                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2686                                              sched_word[1],
2687                                              DLB2_QE_QID_SCHED_WORD + 4);
2688                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2689                                              sched_word[2],
2690                                              DLB2_QE_QID_SCHED_WORD);
2691                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2692                                              sched_word[3],
2693                                              DLB2_QE_QID_SCHED_WORD + 4);
2694
2695                 /* If the destination is a load-balanced queue, store the lock
2696                  * ID. If it is a directed queue, DLB places this field in
2697                  * bytes 10-11 of the received QE, so we format it accordingly:
2698                  * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
2699                  * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2700                  * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
2701                  * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2702                  */
2703 #define DLB2_QE_LOCK_ID_WORD 2
2704                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2705                                 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2706                                         sched_word[0] : ev[0].flow_id,
2707                                 DLB2_QE_LOCK_ID_WORD);
2708                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2709                                 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2710                                         sched_word[1] : ev[1].flow_id,
2711                                 DLB2_QE_LOCK_ID_WORD + 4);
2712                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2713                                 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2714                                         sched_word[2] : ev[2].flow_id,
2715                                 DLB2_QE_LOCK_ID_WORD);
2716                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2717                                 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2718                                         sched_word[3] : ev[3].flow_id,
2719                                 DLB2_QE_LOCK_ID_WORD + 4);
2720
2721                 /* Store the event type and sub event type in the metadata:
2722                  * sse_qe[0][15:0]  = flow_id[0]
2723                  * sse_qe[0][79:64] = flow_id[1]
2724                  * sse_qe[1][15:0]  = flow_id[2]
2725                  * sse_qe[1][79:64] = flow_id[3]
2726                  */
2727 #define DLB2_QE_EV_TYPE_WORD 0
2728                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2729                                              ev[0].sub_event_type << 8 |
2730                                                 ev[0].event_type,
2731                                              DLB2_QE_EV_TYPE_WORD);
2732                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2733                                              ev[1].sub_event_type << 8 |
2734                                                 ev[1].event_type,
2735                                              DLB2_QE_EV_TYPE_WORD + 4);
2736                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2737                                              ev[2].sub_event_type << 8 |
2738                                                 ev[2].event_type,
2739                                              DLB2_QE_EV_TYPE_WORD);
2740                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2741                                              ev[3].sub_event_type << 8 |
2742                                                 ev[3].event_type,
2743                                              DLB2_QE_EV_TYPE_WORD + 4);
2744
2745                 /* Store the metadata to memory (use the double-precision
2746                  * _mm_storeh_pd because there is no integer function for
2747                  * storing the upper 64b):
2748                  * qe[0] metadata = sse_qe[0][63:0]
2749                  * qe[1] metadata = sse_qe[0][127:64]
2750                  * qe[2] metadata = sse_qe[1][63:0]
2751                  * qe[3] metadata = sse_qe[1][127:64]
2752                  */
2753                 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2754                 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2755                               (__m128d)sse_qe[0]);
2756                 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2757                 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2758                               (__m128d)sse_qe[1]);
2759
2760                 qe[0].data = ev[0].u64;
2761                 qe[1].data = ev[1].u64;
2762                 qe[2].data = ev[2].u64;
2763                 qe[3].data = ev[3].u64;
2764
2765                 break;
2766         case 3:
2767         case 2:
2768         case 1:
2769                 for (i = 0; i < num; i++) {
2770                         qe[i].cmd_byte =
2771                                 cmd_byte_map[qm_port->is_directed][ev[i].op];
2772                         qe[i].sched_type = sched_type[i];
2773                         qe[i].data = ev[i].u64;
2774                         qe[i].qid = queue_id[i];
2775                         qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2776                         qe[i].lock_id = ev[i].flow_id;
2777                         if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2778                                 struct dlb2_msg_info *info =
2779                                         (struct dlb2_msg_info *)&qe[i].lock_id;
2780
2781                                 info->qid = queue_id[i];
2782                                 info->sched_type = DLB2_SCHED_DIRECTED;
2783                                 info->priority = qe[i].priority;
2784                         }
2785                         qe[i].u.event_type.major = ev[i].event_type;
2786                         qe[i].u.event_type.sub = ev[i].sub_event_type;
2787                 }
2788                 break;
2789         case 0:
2790                 break;
2791         }
2792 }
2793
2794 static inline int
2795 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2796                         struct dlb2_port *qm_port,
2797                         const struct rte_event ev[],
2798                         uint8_t *sched_type,
2799                         uint8_t *queue_id)
2800 {
2801         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2802         struct dlb2_eventdev_queue *ev_queue;
2803         uint16_t *cached_credits = NULL;
2804         struct dlb2_queue *qm_queue;
2805
2806         ev_queue = &dlb2->ev_queues[ev->queue_id];
2807         qm_queue = &ev_queue->qm_queue;
2808         *queue_id = qm_queue->id;
2809
2810         /* Ignore sched_type and hardware credits on release events */
2811         if (ev->op == RTE_EVENT_OP_RELEASE)
2812                 goto op_check;
2813
2814         if (!qm_queue->is_directed) {
2815                 /* Load balanced destination queue */
2816
2817                 if (dlb2->version == DLB2_HW_V2) {
2818                         if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2819                                 rte_errno = -ENOSPC;
2820                                 return 1;
2821                         }
2822                         cached_credits = &qm_port->cached_ldb_credits;
2823                 } else {
2824                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2825                                 rte_errno = -ENOSPC;
2826                                 return 1;
2827                         }
2828                         cached_credits = &qm_port->cached_credits;
2829                 }
2830                 switch (ev->sched_type) {
2831                 case RTE_SCHED_TYPE_ORDERED:
2832                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2833                         if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2834                                 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2835                                              *queue_id);
2836                                 rte_errno = -EINVAL;
2837                                 return 1;
2838                         }
2839                         *sched_type = DLB2_SCHED_ORDERED;
2840                         break;
2841                 case RTE_SCHED_TYPE_ATOMIC:
2842                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2843                         *sched_type = DLB2_SCHED_ATOMIC;
2844                         break;
2845                 case RTE_SCHED_TYPE_PARALLEL:
2846                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2847                         if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2848                                 *sched_type = DLB2_SCHED_ORDERED;
2849                         else
2850                                 *sched_type = DLB2_SCHED_UNORDERED;
2851                         break;
2852                 default:
2853                         DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2854                         DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2855                         rte_errno = -EINVAL;
2856                         return 1;
2857                 }
2858         } else {
2859                 /* Directed destination queue */
2860
2861                 if (dlb2->version == DLB2_HW_V2) {
2862                         if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2863                                 rte_errno = -ENOSPC;
2864                                 return 1;
2865                         }
2866                         cached_credits = &qm_port->cached_dir_credits;
2867                 } else {
2868                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2869                                 rte_errno = -ENOSPC;
2870                                 return 1;
2871                         }
2872                         cached_credits = &qm_port->cached_credits;
2873                 }
2874                 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2875
2876                 *sched_type = DLB2_SCHED_DIRECTED;
2877         }
2878
2879 op_check:
2880         switch (ev->op) {
2881         case RTE_EVENT_OP_NEW:
2882                 /* Check that a sw credit is available */
2883                 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2884                         rte_errno = -ENOSPC;
2885                         return 1;
2886                 }
2887                 ev_port->inflight_credits--;
2888                 (*cached_credits)--;
2889                 break;
2890         case RTE_EVENT_OP_FORWARD:
2891                 /* Check for outstanding_releases underflow. If this occurs,
2892                  * the application is not using the EVENT_OPs correctly; for
2893                  * example, forwarding or releasing events that were not
2894                  * dequeued.
2895                  */
2896                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2897                 ev_port->outstanding_releases--;
2898                 qm_port->issued_releases++;
2899                 (*cached_credits)--;
2900                 break;
2901         case RTE_EVENT_OP_RELEASE:
2902                 ev_port->inflight_credits++;
2903                 /* Check for outstanding_releases underflow. If this occurs,
2904                  * the application is not using the EVENT_OPs correctly; for
2905                  * example, forwarding or releasing events that were not
2906                  * dequeued.
2907                  */
2908                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2909                 ev_port->outstanding_releases--;
2910                 qm_port->issued_releases++;
2911
2912                 /* Replenish s/w credits if enough are cached */
2913                 dlb2_replenish_sw_credits(dlb2, ev_port);
2914                 break;
2915         }
2916
2917         DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2918         DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2919
2920 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2921         if (ev->op != RTE_EVENT_OP_RELEASE) {
2922                 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2923                 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2924         }
2925 #endif
2926
2927         return 0;
2928 }
2929
2930 static inline uint16_t
2931 __dlb2_event_enqueue_burst(void *event_port,
2932                            const struct rte_event events[],
2933                            uint16_t num,
2934                            bool use_delayed)
2935 {
2936         struct dlb2_eventdev_port *ev_port = event_port;
2937         struct dlb2_port *qm_port = &ev_port->qm_port;
2938         struct process_local_port_data *port_data;
2939         int i;
2940
2941         RTE_ASSERT(ev_port->enq_configured);
2942         RTE_ASSERT(events != NULL);
2943
2944         i = 0;
2945
2946         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2947
2948         while (i < num) {
2949                 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2950                 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2951                 int pop_offs = 0;
2952                 int j = 0;
2953
2954                 memset(qm_port->qe4,
2955                        0,
2956                        DLB2_NUM_QES_PER_CACHE_LINE *
2957                        sizeof(struct dlb2_enqueue_qe));
2958
2959                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2960                         const struct rte_event *ev = &events[i + j];
2961                         int16_t thresh = qm_port->token_pop_thresh;
2962
2963                         if (use_delayed &&
2964                             qm_port->token_pop_mode == DELAYED_POP &&
2965                             (ev->op == RTE_EVENT_OP_FORWARD ||
2966                              ev->op == RTE_EVENT_OP_RELEASE) &&
2967                             qm_port->issued_releases >= thresh - 1) {
2968                                 /* Insert the token pop QE and break out. This
2969                                  * may result in a partial HCW, but that is
2970                                  * simpler than supporting arbitrary QE
2971                                  * insertion.
2972                                  */
2973                                 dlb2_construct_token_pop_qe(qm_port, j);
2974
2975                                 /* Reset the releases for the next QE batch */
2976                                 qm_port->issued_releases -= thresh;
2977
2978                                 pop_offs = 1;
2979                                 j++;
2980                                 break;
2981                         }
2982
2983                         if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2984                                                     &sched_types[j],
2985                                                     &queue_ids[j]))
2986                                 break;
2987                 }
2988
2989                 if (j == 0)
2990                         break;
2991
2992                 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2993                                       sched_types, queue_ids);
2994
2995                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2996
2997                 /* Don't include the token pop QE in the enqueue count */
2998                 i += j - pop_offs;
2999
3000                 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3001                  * pop_offs != 0
3002                  */
3003                 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3004                         break;
3005         }
3006
3007         return i;
3008 }
3009
3010 static uint16_t
3011 dlb2_event_enqueue_burst(void *event_port,
3012                              const struct rte_event events[],
3013                              uint16_t num)
3014 {
3015         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3016 }
3017
3018 static uint16_t
3019 dlb2_event_enqueue_burst_delayed(void *event_port,
3020                                      const struct rte_event events[],
3021                                      uint16_t num)
3022 {
3023         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3024 }
3025
3026 static inline uint16_t
3027 dlb2_event_enqueue(void *event_port,
3028                    const struct rte_event events[])
3029 {
3030         return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3031 }
3032
3033 static inline uint16_t
3034 dlb2_event_enqueue_delayed(void *event_port,
3035                            const struct rte_event events[])
3036 {
3037         return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3038 }
3039
3040 static uint16_t
3041 dlb2_event_enqueue_new_burst(void *event_port,
3042                              const struct rte_event events[],
3043                              uint16_t num)
3044 {
3045         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3046 }
3047
3048 static uint16_t
3049 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3050                                      const struct rte_event events[],
3051                                      uint16_t num)
3052 {
3053         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3054 }
3055
3056 static uint16_t
3057 dlb2_event_enqueue_forward_burst(void *event_port,
3058                                  const struct rte_event events[],
3059                                  uint16_t num)
3060 {
3061         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3062 }
3063
3064 static uint16_t
3065 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3066                                          const struct rte_event events[],
3067                                          uint16_t num)
3068 {
3069         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3070 }
3071
3072 static void
3073 dlb2_event_release(struct dlb2_eventdev *dlb2,
3074                    uint8_t port_id,
3075                    int n)
3076 {
3077         struct process_local_port_data *port_data;
3078         struct dlb2_eventdev_port *ev_port;
3079         struct dlb2_port *qm_port;
3080         int i;
3081
3082         if (port_id > dlb2->num_ports) {
3083                 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3084                              port_id);
3085                 rte_errno = -EINVAL;
3086                 return;
3087         }
3088
3089         ev_port = &dlb2->ev_ports[port_id];
3090         qm_port = &ev_port->qm_port;
3091         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3092
3093         i = 0;
3094
3095         if (qm_port->is_directed) {
3096                 i = n;
3097                 goto sw_credit_update;
3098         }
3099
3100         while (i < n) {
3101                 int pop_offs = 0;
3102                 int j = 0;
3103
3104                 /* Zero-out QEs */
3105                 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3106                 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3107                 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3108                 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3109
3110
3111                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3112                         int16_t thresh = qm_port->token_pop_thresh;
3113
3114                         if (qm_port->token_pop_mode == DELAYED_POP &&
3115                             qm_port->issued_releases >= thresh - 1) {
3116                                 /* Insert the token pop QE */
3117                                 dlb2_construct_token_pop_qe(qm_port, j);
3118
3119                                 /* Reset the releases for the next QE batch */
3120                                 qm_port->issued_releases -= thresh;
3121
3122                                 pop_offs = 1;
3123                                 j++;
3124                                 break;
3125                         }
3126
3127                         qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3128                         qm_port->issued_releases++;
3129                 }
3130
3131                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3132
3133                 /* Don't include the token pop QE in the release count */
3134                 i += j - pop_offs;
3135         }
3136
3137 sw_credit_update:
3138         /* each release returns one credit */
3139         if (unlikely(!ev_port->outstanding_releases)) {
3140                 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3141                              __func__);
3142                 return;
3143         }
3144         ev_port->outstanding_releases -= i;
3145         ev_port->inflight_credits += i;
3146
3147         /* Replenish s/w credits if enough releases are performed */
3148         dlb2_replenish_sw_credits(dlb2, ev_port);
3149 }
3150
3151 static inline void
3152 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3153 {
3154         uint32_t batch_size = qm_port->hw_credit_quanta;
3155
3156         /* increment port credits, and return to pool if exceeds threshold */
3157         if (!qm_port->is_directed) {
3158                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3159                         qm_port->cached_ldb_credits += num;
3160                         if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3161                                 __atomic_fetch_add(
3162                                         qm_port->credit_pool[DLB2_LDB_QUEUE],
3163                                         batch_size, __ATOMIC_SEQ_CST);
3164                                 qm_port->cached_ldb_credits -= batch_size;
3165                         }
3166                 } else {
3167                         qm_port->cached_credits += num;
3168                         if (qm_port->cached_credits >= 2 * batch_size) {
3169                                 __atomic_fetch_add(
3170                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3171                                       batch_size, __ATOMIC_SEQ_CST);
3172                                 qm_port->cached_credits -= batch_size;
3173                         }
3174                 }
3175         } else {
3176                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3177                         qm_port->cached_dir_credits += num;
3178                         if (qm_port->cached_dir_credits >= 2 * batch_size) {
3179                                 __atomic_fetch_add(
3180                                         qm_port->credit_pool[DLB2_DIR_QUEUE],
3181                                         batch_size, __ATOMIC_SEQ_CST);
3182                                 qm_port->cached_dir_credits -= batch_size;
3183                         }
3184                 } else {
3185                         qm_port->cached_credits += num;
3186                         if (qm_port->cached_credits >= 2 * batch_size) {
3187                                 __atomic_fetch_add(
3188                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3189                                       batch_size, __ATOMIC_SEQ_CST);
3190                                 qm_port->cached_credits -= batch_size;
3191                         }
3192                 }
3193         }
3194 }
3195
3196 #define CLB_MASK_IDX 0
3197 #define CLB_VAL_IDX 1
3198 static int
3199 dlb2_monitor_callback(const uint64_t val,
3200                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3201 {
3202         /* abort if the value matches */
3203         return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3204 }
3205
3206 static inline int
3207 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3208                   struct dlb2_eventdev_port *ev_port,
3209                   struct dlb2_port *qm_port,
3210                   uint64_t timeout,
3211                   uint64_t start_ticks)
3212 {
3213         struct process_local_port_data *port_data;
3214         uint64_t elapsed_ticks;
3215
3216         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3217
3218         elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3219
3220         /* Wait/poll time expired */
3221         if (elapsed_ticks >= timeout) {
3222                 return 1;
3223         } else if (dlb2->umwait_allowed) {
3224                 struct rte_power_monitor_cond pmc;
3225                 volatile struct dlb2_dequeue_qe *cq_base;
3226                 union {
3227                         uint64_t raw_qe[2];
3228                         struct dlb2_dequeue_qe qe;
3229                 } qe_mask;
3230                 uint64_t expected_value;
3231                 volatile uint64_t *monitor_addr;
3232
3233                 qe_mask.qe.cq_gen = 1; /* set mask */
3234
3235                 cq_base = port_data->cq_base;
3236                 monitor_addr = (volatile uint64_t *)(volatile void *)
3237                         &cq_base[qm_port->cq_idx];
3238                 monitor_addr++; /* cq_gen bit is in second 64bit location */
3239
3240                 if (qm_port->gen_bit)
3241                         expected_value = qe_mask.raw_qe[1];
3242                 else
3243                         expected_value = 0;
3244
3245                 pmc.addr = monitor_addr;
3246                 /* store expected value and comparison mask in opaque data */
3247                 pmc.opaque[CLB_VAL_IDX] = expected_value;
3248                 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3249                 /* set up callback */
3250                 pmc.fn = dlb2_monitor_callback;
3251                 pmc.size = sizeof(uint64_t);
3252
3253                 rte_power_monitor(&pmc, timeout + start_ticks);
3254
3255                 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3256         } else {
3257                 uint64_t poll_interval = dlb2->poll_interval;
3258                 uint64_t curr_ticks = rte_get_timer_cycles();
3259                 uint64_t init_ticks = curr_ticks;
3260
3261                 while ((curr_ticks - start_ticks < timeout) &&
3262                        (curr_ticks - init_ticks < poll_interval))
3263                         curr_ticks = rte_get_timer_cycles();
3264         }
3265
3266         return 0;
3267 }
3268
3269 static __rte_noinline int
3270 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3271                          struct dlb2_port *qm_port,
3272                          struct rte_event *events,
3273                          struct dlb2_dequeue_qe *qes,
3274                          int cnt)
3275 {
3276         uint8_t *qid_mappings = qm_port->qid_mappings;
3277         int i, num, evq_id;
3278
3279         for (i = 0, num = 0; i < cnt; i++) {
3280                 struct dlb2_dequeue_qe *qe = &qes[i];
3281                 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3282                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3283                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3284                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3285                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3286                 };
3287
3288                 /* Fill in event information.
3289                  * Note that flow_id must be embedded in the data by
3290                  * the app, such as the mbuf RSS hash field if the data
3291                  * buffer is a mbuf.
3292                  */
3293                 if (unlikely(qe->error)) {
3294                         DLB2_LOG_ERR("QE error bit ON\n");
3295                         DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3296                         dlb2_consume_qe_immediate(qm_port, 1);
3297                         continue; /* Ignore */
3298                 }
3299
3300                 events[num].u64 = qe->data;
3301                 events[num].flow_id = qe->flow_id;
3302                 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3303                 events[num].event_type = qe->u.event_type.major;
3304                 events[num].sub_event_type = qe->u.event_type.sub;
3305                 events[num].sched_type = sched_type_map[qe->sched_type];
3306                 events[num].impl_opaque = qe->qid_depth;
3307
3308                 /* qid not preserved for directed queues */
3309                 if (qm_port->is_directed)
3310                         evq_id = ev_port->link[0].queue_id;
3311                 else
3312                         evq_id = qid_mappings[qe->qid];
3313
3314                 events[num].queue_id = evq_id;
3315                 DLB2_INC_STAT(
3316                         ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3317                         1);
3318                 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3319                 num++;
3320         }
3321
3322         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3323
3324         return num;
3325 }
3326
3327 static inline int
3328 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3329                               struct dlb2_port *qm_port,
3330                               struct rte_event *events,
3331                               struct dlb2_dequeue_qe *qes)
3332 {
3333         int sched_type_map[] = {
3334                 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3335                 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3336                 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3337                 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3338         };
3339         const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3340         uint8_t *qid_mappings = qm_port->qid_mappings;
3341         __m128i sse_evt[2];
3342
3343         /* In the unlikely case that any of the QE error bits are set, process
3344          * them one at a time.
3345          */
3346         if (unlikely(qes[0].error || qes[1].error ||
3347                      qes[2].error || qes[3].error))
3348                 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3349                                                  qes, num_events);
3350
3351         events[0].u64 = qes[0].data;
3352         events[1].u64 = qes[1].data;
3353         events[2].u64 = qes[2].data;
3354         events[3].u64 = qes[3].data;
3355
3356         /* Construct the metadata portion of two struct rte_events
3357          * in one 128b SSE register. Event metadata is constructed in the SSE
3358          * registers like so:
3359          * sse_evt[0][63:0]:   event[0]'s metadata
3360          * sse_evt[0][127:64]: event[1]'s metadata
3361          * sse_evt[1][63:0]:   event[2]'s metadata
3362          * sse_evt[1][127:64]: event[3]'s metadata
3363          */
3364         sse_evt[0] = _mm_setzero_si128();
3365         sse_evt[1] = _mm_setzero_si128();
3366
3367         /* Convert the hardware queue ID to an event queue ID and store it in
3368          * the metadata:
3369          * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
3370          * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3371          * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
3372          * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3373          */
3374 #define DLB_EVENT_QUEUE_ID_BYTE 5
3375         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3376                                      qid_mappings[qes[0].qid],
3377                                      DLB_EVENT_QUEUE_ID_BYTE);
3378         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3379                                      qid_mappings[qes[1].qid],
3380                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3381         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3382                                      qid_mappings[qes[2].qid],
3383                                      DLB_EVENT_QUEUE_ID_BYTE);
3384         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3385                                      qid_mappings[qes[3].qid],
3386                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3387
3388         /* Convert the hardware priority to an event priority and store it in
3389          * the metadata, while also returning the queue depth status
3390          * value captured by the hardware, storing it in impl_opaque, which can
3391          * be read by the application but not modified
3392          * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
3393          * sse_evt[0][63:56]   = qes[0].qid_depth
3394          * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3395          * sse_evt[0][127:120] = qes[1].qid_depth
3396          * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
3397          * sse_evt[1][63:56]   = qes[2].qid_depth
3398          * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3399          * sse_evt[1][127:120] = qes[3].qid_depth
3400          */
3401 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3402 #define DLB_BYTE_SHIFT 8
3403         sse_evt[0] =
3404                 _mm_insert_epi16(sse_evt[0],
3405                         DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3406                         (qes[0].qid_depth << DLB_BYTE_SHIFT),
3407                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3408         sse_evt[0] =
3409                 _mm_insert_epi16(sse_evt[0],
3410                         DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3411                         (qes[1].qid_depth << DLB_BYTE_SHIFT),
3412                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3413         sse_evt[1] =
3414                 _mm_insert_epi16(sse_evt[1],
3415                         DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3416                         (qes[2].qid_depth << DLB_BYTE_SHIFT),
3417                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3418         sse_evt[1] =
3419                 _mm_insert_epi16(sse_evt[1],
3420                         DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3421                         (qes[3].qid_depth << DLB_BYTE_SHIFT),
3422                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3423
3424         /* Write the event type, sub event type, and flow_id to the event
3425          * metadata.
3426          * sse_evt[0][31:0]   = qes[0].flow_id |
3427          *                      qes[0].u.event_type.major << 28 |
3428          *                      qes[0].u.event_type.sub << 20;
3429          * sse_evt[0][95:64]  = qes[1].flow_id |
3430          *                      qes[1].u.event_type.major << 28 |
3431          *                      qes[1].u.event_type.sub << 20;
3432          * sse_evt[1][31:0]   = qes[2].flow_id |
3433          *                      qes[2].u.event_type.major << 28 |
3434          *                      qes[2].u.event_type.sub << 20;
3435          * sse_evt[1][95:64]  = qes[3].flow_id |
3436          *                      qes[3].u.event_type.major << 28 |
3437          *                      qes[3].u.event_type.sub << 20;
3438          */
3439 #define DLB_EVENT_EV_TYPE_DW 0
3440 #define DLB_EVENT_EV_TYPE_SHIFT 28
3441 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3442         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3443                         qes[0].flow_id |
3444                         qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3445                         qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3446                         DLB_EVENT_EV_TYPE_DW);
3447         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3448                         qes[1].flow_id |
3449                         qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3450                         qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3451                         DLB_EVENT_EV_TYPE_DW + 2);
3452         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3453                         qes[2].flow_id |
3454                         qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3455                         qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3456                         DLB_EVENT_EV_TYPE_DW);
3457         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3458                         qes[3].flow_id |
3459                         qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
3460                         qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3461                         DLB_EVENT_EV_TYPE_DW + 2);
3462
3463         /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3464          * set:
3465          * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
3466          * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3467          * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
3468          * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3469          */
3470 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3471 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3472         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3473                 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3474                 DLB_EVENT_SCHED_TYPE_BYTE);
3475         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3476                 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3477                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3478         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3479                 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3480                 DLB_EVENT_SCHED_TYPE_BYTE);
3481         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3482                 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3483                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3484
3485         /* Store the metadata to the event (use the double-precision
3486          * _mm_storeh_pd because there is no integer function for storing the
3487          * upper 64b):
3488          * events[0].event = sse_evt[0][63:0]
3489          * events[1].event = sse_evt[0][127:64]
3490          * events[2].event = sse_evt[1][63:0]
3491          * events[3].event = sse_evt[1][127:64]
3492          */
3493         _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3494         _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3495         _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3496         _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3497
3498         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3499         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3500         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3501         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3502
3503         DLB2_INC_STAT(
3504                 ev_port->stats.queue[events[0].queue_id].
3505                         qid_depth[qes[0].qid_depth],
3506                 1);
3507         DLB2_INC_STAT(
3508                 ev_port->stats.queue[events[1].queue_id].
3509                         qid_depth[qes[1].qid_depth],
3510                 1);
3511         DLB2_INC_STAT(
3512                 ev_port->stats.queue[events[2].queue_id].
3513                         qid_depth[qes[2].qid_depth],
3514                 1);
3515         DLB2_INC_STAT(
3516                 ev_port->stats.queue[events[3].queue_id].
3517                         qid_depth[qes[3].qid_depth],
3518                 1);
3519
3520         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3521
3522         return num_events;
3523 }
3524
3525 static __rte_always_inline int
3526 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3527 {
3528         volatile struct dlb2_dequeue_qe *cq_addr;
3529         uint8_t xor_mask[2] = {0x0F, 0x00};
3530         const uint8_t and_mask = 0x0F;
3531         __m128i *qes = (__m128i *)qe;
3532         uint8_t gen_bits, gen_bit;
3533         uintptr_t addr[4];
3534         uint16_t idx;
3535
3536         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3537
3538         idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3539         /* Load the next 4 QEs */
3540         addr[0] = (uintptr_t)&cq_addr[idx];
3541         addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3542         addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3543         addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3544
3545         /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3546         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3547         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3548         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3549         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3550
3551         /* Correct the xor_mask for wrap-around QEs */
3552         gen_bit = qm_port->gen_bit;
3553         xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3554         xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3555         xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3556
3557         /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3558          * valid, then QEs[0:N-1] are too.
3559          */
3560         qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3561         rte_compiler_barrier();
3562         qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3563         rte_compiler_barrier();
3564         qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3565         rte_compiler_barrier();
3566         qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3567
3568         /* Extract and combine the gen bits */
3569         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3570                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3571                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3572                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3573
3574         /* XOR the combined bits such that a 1 represents a valid QE */
3575         gen_bits ^= xor_mask[gen_bit];
3576
3577         /* Mask off gen bits we don't care about */
3578         gen_bits &= and_mask;
3579
3580         return __builtin_popcount(gen_bits);
3581 }
3582
3583 static inline void
3584 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3585                           struct rte_event *events,
3586                           __m128i v_qe_3,
3587                           __m128i v_qe_2,
3588                           __m128i v_qe_1,
3589                           __m128i v_qe_0,
3590                           __m128i v_qe_meta,
3591                           __m128i v_qe_status,
3592                           uint32_t valid_events)
3593 {
3594         /* Look up the event QIDs, using the hardware QIDs to index the
3595          * port's QID mapping.
3596          *
3597          * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3598          * passed along in registers as the QE data is required later.
3599          *
3600          * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3601          * 32-bit slice of each QE, so makes up a full SSE register. This
3602          * allows parallel processing of 4x QEs in a single register.
3603          */
3604
3605         __m128i v_qid_done = {0};
3606         int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3607         int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3608         int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3609         int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3610
3611         int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3612         int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3613         int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3614         int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3615
3616         int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3617         int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3618         int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3619         int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3620
3621         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3622         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3623         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3624         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3625
3626         /* Schedule field remapping using byte shuffle
3627          * - Full byte containing sched field handled here (op, rsvd are zero)
3628          * - Note sanitizing the register requires two masking ANDs:
3629          *   1) to strip prio/msg_type from byte for correct shuffle lookup
3630          *   2) to strip any non-sched-field lanes from any results to OR later
3631          * - Final byte result is >> 10 to another byte-lane inside the u32.
3632          *   This makes the final combination OR easier to make the rte_event.
3633          */
3634         __m128i v_sched_done;
3635         __m128i v_sched_bits;
3636         {
3637                 static const uint8_t sched_type_map[16] = {
3638                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3639                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3640                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3641                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3642                 };
3643                 static const uint8_t sched_and_mask[16] = {
3644                         0x00, 0x00, 0x00, 0x03,
3645                         0x00, 0x00, 0x00, 0x03,
3646                         0x00, 0x00, 0x00, 0x03,
3647                         0x00, 0x00, 0x00, 0x03,
3648                 };
3649                 const __m128i v_sched_map = _mm_loadu_si128(
3650                                              (const __m128i *)sched_type_map);
3651                 __m128i v_sched_mask = _mm_loadu_si128(
3652                                              (const __m128i *)&sched_and_mask);
3653                 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3654                 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3655                                                             v_sched_bits);
3656                 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3657                                                    v_sched_mask);
3658                 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3659         }
3660
3661         /* Priority handling
3662          * - QE provides 3 bits of priority
3663          * - Shift << 3 to move to MSBs for byte-prio in rte_event
3664          * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3665          */
3666         __m128i v_prio_done;
3667         {
3668                 static const uint8_t prio_mask[16] = {
3669                         0x00, 0x00, 0x00, 0x07 << 5,
3670                         0x00, 0x00, 0x00, 0x07 << 5,
3671                         0x00, 0x00, 0x00, 0x07 << 5,
3672                         0x00, 0x00, 0x00, 0x07 << 5,
3673                 };
3674                 __m128i v_prio_mask  = _mm_loadu_si128(
3675                                                 (const __m128i *)prio_mask);
3676                 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3677                 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3678         }
3679
3680         /* Event Sub/Type handling:
3681          * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3682          * to get the sub/ev type data into rte_event location, clearing the
3683          * lower 20 bits in the process.
3684          */
3685         __m128i v_types_done;
3686         {
3687                 static const uint8_t event_mask[16] = {
3688                         0x0f, 0x00, 0x00, 0x00,
3689                         0x0f, 0x00, 0x00, 0x00,
3690                         0x0f, 0x00, 0x00, 0x00,
3691                         0x0f, 0x00, 0x00, 0x00,
3692                 };
3693                 static const uint8_t sub_event_mask[16] = {
3694                         0xff, 0x00, 0x00, 0x00,
3695                         0xff, 0x00, 0x00, 0x00,
3696                         0xff, 0x00, 0x00, 0x00,
3697                         0xff, 0x00, 0x00, 0x00,
3698                 };
3699                 static const uint8_t flow_mask[16] = {
3700                         0xff, 0xff, 0x00, 0x00,
3701                         0xff, 0xff, 0x00, 0x00,
3702                         0xff, 0xff, 0x00, 0x00,
3703                         0xff, 0xff, 0x00, 0x00,
3704                 };
3705                 __m128i v_event_mask  = _mm_loadu_si128(
3706                                         (const __m128i *)event_mask);
3707                 __m128i v_sub_event_mask  = _mm_loadu_si128(
3708                                         (const __m128i *)sub_event_mask);
3709                 __m128i v_flow_mask  = _mm_loadu_si128(
3710                                        (const __m128i *)flow_mask);
3711                 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3712                 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3713                 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3714                 v_type = _mm_slli_epi32(v_type, 8);
3715                 v_types_done = _mm_or_si128(v_type, v_sub);
3716                 v_types_done = _mm_slli_epi32(v_types_done, 20);
3717                 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3718                 v_types_done = _mm_or_si128(v_types_done, v_flow);
3719         }
3720
3721         /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3722          * with the rte_event, allowing unpacks to move/blend with payload.
3723          */
3724         __m128i v_q_s_p_done;
3725         {
3726                 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3727                 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3728                 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3729         }
3730
3731         __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3732
3733         /* Unpack evs into u64 metadata, then indiv events */
3734         v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3735         v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3736
3737         switch (valid_events) {
3738         case 4:
3739                 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3740                 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3741                 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3742                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3743                               1);
3744                 /* fallthrough */
3745         case 3:
3746                 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3747                 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3748                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3749                               1);
3750                 /* fallthrough */
3751         case 2:
3752                 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3753                 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3754                 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3755                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3756                               1);
3757                 /* fallthrough */
3758         case 1:
3759                 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3760                 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3761                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3762                               1);
3763         }
3764 }
3765
3766 static __rte_always_inline int
3767 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3768                         uint32_t max_events)
3769 {
3770         /* Using unmasked idx for perf, and masking manually */
3771         uint16_t idx = qm_port->cq_idx_unmasked;
3772         volatile struct dlb2_dequeue_qe *cq_addr;
3773
3774         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3775
3776         uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3777                                                  qm_port->cq_depth_mask];
3778         uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx +  8) &
3779                                                  qm_port->cq_depth_mask];
3780         uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx +  4) &
3781                                                  qm_port->cq_depth_mask];
3782         uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx +  0) &
3783                                                  qm_port->cq_depth_mask];
3784
3785         /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3786         __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3787         rte_compiler_barrier();
3788         __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3789         rte_compiler_barrier();
3790         __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3791         rte_compiler_barrier();
3792         __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3793
3794         /* Generate the pkt_shuffle mask;
3795          * - Avoids load in otherwise load-heavy section of code
3796          * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3797          */
3798         const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3799         __m128i v_zeros = _mm_setzero_si128();
3800         __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3801         __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3802
3803         /* Extract u32 components required from the QE
3804          * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3805          * - QE[96 to 127] for status (cq gen bit, error)
3806          *
3807          * Note that stage 1 of the unpacking is re-used for both u32 extracts
3808          */
3809         __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3810         __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3811         __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3812         __m128i v_qe_meta   = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3813
3814         /* Status byte (gen_bit, error) handling:
3815          * - Shuffle to lanes 0,1,2,3, clear all others
3816          * - Shift right by 7 for gen bit to MSB, movemask to scalar
3817          * - Shift right by 2 for error bit to MSB, movemask to scalar
3818          */
3819         __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3820         __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3821         int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3822
3823         /* Expected vs Reality of QE Gen bits
3824          * - cq_rolling_mask provides expected bits
3825          * - QE loads, unpacks/shuffle and movemask provides reality
3826          * - XOR of the two gives bitmask of new packets
3827          * - POPCNT to get the number of new events
3828          */
3829         uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3830         uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3831         uint32_t count_new = __builtin_popcount(qe_xor_bits);
3832         count_new = RTE_MIN(count_new, max_events);
3833         if (!count_new)
3834                 return 0;
3835
3836         /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3837
3838         uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3839         uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3840         uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3841         uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3842
3843         /* shifted out of m2 into MSB of m */
3844         qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3845
3846         /* shifted out of m "looped back" into MSB of m2 */
3847         qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3848
3849         /* Prefetch the next QEs - should run as IPC instead of cycles */
3850         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3851         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3852         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3853         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3854
3855         /* Convert QEs from XMM regs to events and store events directly */
3856         _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3857                                   v_qe_0, v_qe_meta, v_qe_status, count_new);
3858
3859         return count_new;
3860 }
3861
3862 static inline void
3863 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3864 {
3865         uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3866
3867         qm_port->cq_idx_unmasked = idx;
3868         qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3869         qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3870 }
3871
3872 static inline int16_t
3873 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3874                        struct dlb2_eventdev_port *ev_port,
3875                        struct rte_event *events,
3876                        uint16_t max_num,
3877                        uint64_t dequeue_timeout_ticks)
3878 {
3879         uint64_t start_ticks = 0ULL;
3880         struct dlb2_port *qm_port;
3881         int num = 0;
3882         bool use_scalar;
3883         uint64_t timeout;
3884
3885         qm_port = &ev_port->qm_port;
3886         use_scalar = qm_port->use_scalar;
3887
3888         if (!dlb2->global_dequeue_wait)
3889                 timeout = dequeue_timeout_ticks;
3890         else
3891                 timeout = dlb2->global_dequeue_wait_ticks;
3892
3893         start_ticks = rte_get_timer_cycles();
3894
3895         use_scalar = use_scalar || (max_num & 0x3);
3896
3897         while (num < max_num) {
3898                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3899                 int num_avail;
3900                 if (use_scalar) {
3901                         num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3902                         num_avail = RTE_MIN(num_avail, max_num - num);
3903                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3904                         if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3905                                 num += dlb2_process_dequeue_four_qes(ev_port,
3906                                                                   qm_port,
3907                                                                   &events[num],
3908                                                                   &qes[0]);
3909                         else if (num_avail)
3910                                 num += dlb2_process_dequeue_qes(ev_port,
3911                                                                 qm_port,
3912                                                                 &events[num],
3913                                                                 &qes[0],
3914                                                                 num_avail);
3915                 } else { /* !use_scalar */
3916                         num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3917                                                             &events[num],
3918                                                             max_num - num);
3919                         num += num_avail;
3920                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3921                         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3922                 }
3923                 if (!num_avail) {
3924                         if (num > 0)
3925                                 break;
3926                         else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3927                                                    timeout, start_ticks))
3928                                 break;
3929                 }
3930         }
3931
3932         qm_port->owed_tokens += num;
3933
3934         if (num) {
3935                 if (qm_port->token_pop_mode == AUTO_POP)
3936                         dlb2_consume_qe_immediate(qm_port, num);
3937
3938                 ev_port->outstanding_releases += num;
3939
3940                 dlb2_port_credits_inc(qm_port, num);
3941         }
3942
3943         return num;
3944 }
3945
3946 static __rte_always_inline int
3947 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3948              uint8_t *offset)
3949 {
3950         uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3951                                    {0x00, 0x01, 0x03, 0x07} };
3952         uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3953         volatile struct dlb2_dequeue_qe *cq_addr;
3954         __m128i *qes = (__m128i *)qe;
3955         uint64_t *cache_line_base;
3956         uint8_t gen_bits;
3957
3958         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3959         cq_addr = &cq_addr[qm_port->cq_idx];
3960
3961         cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3962         *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3963
3964         /* Load the next CQ cache line from memory. Pack these reads as tight
3965          * as possible to reduce the chance that DLB invalidates the line while
3966          * the CPU is reading it. Read the cache line backwards to ensure that
3967          * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3968          *
3969          * (Valid QEs start at &qe[offset])
3970          */
3971         qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3972         qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3973         qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3974         qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3975
3976         /* Evict the cache line ASAP */
3977         rte_cldemote(cache_line_base);
3978
3979         /* Extract and combine the gen bits */
3980         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3981                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3982                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3983                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3984
3985         /* XOR the combined bits such that a 1 represents a valid QE */
3986         gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
3987
3988         /* Mask off gen bits we don't care about */
3989         gen_bits &= and_mask[*offset];
3990
3991         return __builtin_popcount(gen_bits);
3992 }
3993
3994 static inline int16_t
3995 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
3996                 struct dlb2_eventdev_port *ev_port,
3997                 struct rte_event *events,
3998                 uint16_t max_num,
3999                 uint64_t dequeue_timeout_ticks)
4000 {
4001         uint64_t timeout;
4002         uint64_t start_ticks = 0ULL;
4003         struct dlb2_port *qm_port;
4004         int num = 0;
4005
4006         qm_port = &ev_port->qm_port;
4007
4008         /* We have a special implementation for waiting. Wait can be:
4009          * 1) no waiting at all
4010          * 2) busy poll only
4011          * 3) wait for interrupt. If wakeup and poll time
4012          * has expired, then return to caller
4013          * 4) umonitor/umwait repeatedly up to poll time
4014          */
4015
4016         /* If configured for per dequeue wait, then use wait value provided
4017          * to this API. Otherwise we must use the global
4018          * value from eventdev config time.
4019          */
4020         if (!dlb2->global_dequeue_wait)
4021                 timeout = dequeue_timeout_ticks;
4022         else
4023                 timeout = dlb2->global_dequeue_wait_ticks;
4024
4025         start_ticks = rte_get_timer_cycles();
4026
4027         while (num < max_num) {
4028                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4029                 uint8_t offset;
4030                 int num_avail;
4031
4032                 /* Copy up to 4 QEs from the current cache line into qes */
4033                 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4034
4035                 /* But don't process more than the user requested */
4036                 num_avail = RTE_MIN(num_avail, max_num - num);
4037
4038                 dlb2_inc_cq_idx(qm_port, num_avail);
4039
4040                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4041                         num += dlb2_process_dequeue_four_qes(ev_port,
4042                                                              qm_port,
4043                                                              &events[num],
4044                                                              &qes[offset]);
4045                 else if (num_avail)
4046                         num += dlb2_process_dequeue_qes(ev_port,
4047                                                         qm_port,
4048                                                         &events[num],
4049                                                         &qes[offset],
4050                                                         num_avail);
4051                 else if ((timeout == 0) || (num > 0))
4052                         /* Not waiting in any form, or 1+ events received? */
4053                         break;
4054                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4055                                            timeout, start_ticks))
4056                         break;
4057         }
4058
4059         qm_port->owed_tokens += num;
4060
4061         if (num) {
4062                 if (qm_port->token_pop_mode == AUTO_POP)
4063                         dlb2_consume_qe_immediate(qm_port, num);
4064
4065                 ev_port->outstanding_releases += num;
4066
4067                 dlb2_port_credits_inc(qm_port, num);
4068         }
4069
4070         return num;
4071 }
4072
4073 static uint16_t
4074 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4075                          uint64_t wait)
4076 {
4077         struct dlb2_eventdev_port *ev_port = event_port;
4078         struct dlb2_port *qm_port = &ev_port->qm_port;
4079         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4080         uint16_t cnt;
4081
4082         RTE_ASSERT(ev_port->setup_done);
4083         RTE_ASSERT(ev != NULL);
4084
4085         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4086                 uint16_t out_rels = ev_port->outstanding_releases;
4087
4088                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4089
4090                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4091         }
4092
4093         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4094                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4095
4096         cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4097
4098         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4099         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4100
4101         return cnt;
4102 }
4103
4104 static uint16_t
4105 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4106 {
4107         return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4108 }
4109
4110 static uint16_t
4111 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4112                                 uint16_t num, uint64_t wait)
4113 {
4114         struct dlb2_eventdev_port *ev_port = event_port;
4115         struct dlb2_port *qm_port = &ev_port->qm_port;
4116         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4117         uint16_t cnt;
4118
4119         RTE_ASSERT(ev_port->setup_done);
4120         RTE_ASSERT(ev != NULL);
4121
4122         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4123                 uint16_t out_rels = ev_port->outstanding_releases;
4124
4125                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4126
4127                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4128         }
4129
4130         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4131                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4132
4133         cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4134
4135         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4136         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4137         return cnt;
4138 }
4139
4140 static uint16_t
4141 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4142                           uint64_t wait)
4143 {
4144         return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4145 }
4146
4147 static void
4148 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4149 {
4150         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4151         eventdev_stop_flush_t flush;
4152         struct rte_event ev;
4153         uint8_t dev_id;
4154         void *arg;
4155         int i;
4156
4157         flush = dev->dev_ops->dev_stop_flush;
4158         dev_id = dev->data->dev_id;
4159         arg = dev->data->dev_stop_flush_arg;
4160
4161         while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4162                 if (flush)
4163                         flush(dev_id, ev, arg);
4164
4165                 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4166                         continue;
4167
4168                 ev.op = RTE_EVENT_OP_RELEASE;
4169
4170                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4171         }
4172
4173         /* Enqueue any additional outstanding releases */
4174         ev.op = RTE_EVENT_OP_RELEASE;
4175
4176         for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4177                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4178 }
4179
4180 static uint32_t
4181 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4182                          struct dlb2_eventdev_queue *queue)
4183 {
4184         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4185         struct dlb2_get_ldb_queue_depth_args cfg;
4186         int ret;
4187
4188         cfg.queue_id = queue->qm_queue.id;
4189
4190         ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4191         if (ret < 0) {
4192                 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4193                              ret, dlb2_error_strings[cfg.response.status]);
4194                 return ret;
4195         }
4196
4197         return cfg.response.id;
4198 }
4199
4200 static uint32_t
4201 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4202                          struct dlb2_eventdev_queue *queue)
4203 {
4204         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4205         struct dlb2_get_dir_queue_depth_args cfg;
4206         int ret;
4207
4208         cfg.queue_id = queue->qm_queue.id;
4209
4210         ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4211         if (ret < 0) {
4212                 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4213                              ret, dlb2_error_strings[cfg.response.status]);
4214                 return ret;
4215         }
4216
4217         return cfg.response.id;
4218 }
4219
4220 uint32_t
4221 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4222                      struct dlb2_eventdev_queue *queue)
4223 {
4224         if (queue->qm_queue.is_directed)
4225                 return dlb2_get_dir_queue_depth(dlb2, queue);
4226         else
4227                 return dlb2_get_ldb_queue_depth(dlb2, queue);
4228 }
4229
4230 static bool
4231 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4232                     struct dlb2_eventdev_queue *queue)
4233 {
4234         return dlb2_get_queue_depth(dlb2, queue) == 0;
4235 }
4236
4237 static bool
4238 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4239 {
4240         int i;
4241
4242         for (i = 0; i < dlb2->num_queues; i++) {
4243                 if (dlb2->ev_queues[i].num_links == 0)
4244                         continue;
4245                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4246                         return false;
4247         }
4248
4249         return true;
4250 }
4251
4252 static bool
4253 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4254 {
4255         int i;
4256
4257         for (i = 0; i < dlb2->num_queues; i++) {
4258                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4259                         return false;
4260         }
4261
4262         return true;
4263 }
4264
4265 static void
4266 dlb2_drain(struct rte_eventdev *dev)
4267 {
4268         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4269         struct dlb2_eventdev_port *ev_port = NULL;
4270         uint8_t dev_id;
4271         int i;
4272
4273         dev_id = dev->data->dev_id;
4274
4275         while (!dlb2_linked_queues_empty(dlb2)) {
4276                 /* Flush all the ev_ports, which will drain all their connected
4277                  * queues.
4278                  */
4279                 for (i = 0; i < dlb2->num_ports; i++)
4280                         dlb2_flush_port(dev, i);
4281         }
4282
4283         /* The queues are empty, but there may be events left in the ports. */
4284         for (i = 0; i < dlb2->num_ports; i++)
4285                 dlb2_flush_port(dev, i);
4286
4287         /* If the domain's queues are empty, we're done. */
4288         if (dlb2_queues_empty(dlb2))
4289                 return;
4290
4291         /* Else, there must be at least one unlinked load-balanced queue.
4292          * Select a load-balanced port with which to drain the unlinked
4293          * queue(s).
4294          */
4295         for (i = 0; i < dlb2->num_ports; i++) {
4296                 ev_port = &dlb2->ev_ports[i];
4297
4298                 if (!ev_port->qm_port.is_directed)
4299                         break;
4300         }
4301
4302         if (i == dlb2->num_ports) {
4303                 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4304                 return;
4305         }
4306
4307         rte_errno = 0;
4308         rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4309
4310         if (rte_errno) {
4311                 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4312                              ev_port->id);
4313                 return;
4314         }
4315
4316         for (i = 0; i < dlb2->num_queues; i++) {
4317                 uint8_t qid, prio;
4318                 int ret;
4319
4320                 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4321                         continue;
4322
4323                 qid = i;
4324                 prio = 0;
4325
4326                 /* Link the ev_port to the queue */
4327                 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4328                 if (ret != 1) {
4329                         DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4330                                      ev_port->id, qid);
4331                         return;
4332                 }
4333
4334                 /* Flush the queue */
4335                 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4336                         dlb2_flush_port(dev, ev_port->id);
4337
4338                 /* Drain any extant events in the ev_port. */
4339                 dlb2_flush_port(dev, ev_port->id);
4340
4341                 /* Unlink the ev_port from the queue */
4342                 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4343                 if (ret != 1) {
4344                         DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4345                                      ev_port->id, qid);
4346                         return;
4347                 }
4348         }
4349 }
4350
4351 static void
4352 dlb2_eventdev_stop(struct rte_eventdev *dev)
4353 {
4354         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4355
4356         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4357
4358         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4359                 DLB2_LOG_DBG("Internal error: already stopped\n");
4360                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4361                 return;
4362         } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4363                 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4364                              (int)dlb2->run_state);
4365                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4366                 return;
4367         }
4368
4369         dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4370
4371         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4372
4373         dlb2_drain(dev);
4374
4375         dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4376 }
4377
4378 static int
4379 dlb2_eventdev_close(struct rte_eventdev *dev)
4380 {
4381         dlb2_hw_reset_sched_domain(dev, false);
4382
4383         return 0;
4384 }
4385
4386 static void
4387 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4388 {
4389         RTE_SET_USED(dev);
4390         RTE_SET_USED(id);
4391
4392         /* This function intentionally left blank. */
4393 }
4394
4395 static void
4396 dlb2_eventdev_port_release(void *port)
4397 {
4398         struct dlb2_eventdev_port *ev_port = port;
4399         struct dlb2_port *qm_port;
4400
4401         if (ev_port) {
4402                 qm_port = &ev_port->qm_port;
4403                 if (qm_port->config_state == DLB2_CONFIGURED)
4404                         dlb2_free_qe_mem(qm_port);
4405         }
4406 }
4407
4408 static int
4409 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4410                             uint64_t *timeout_ticks)
4411 {
4412         RTE_SET_USED(dev);
4413         uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4414
4415         *timeout_ticks = ns * cycles_per_ns;
4416
4417         return 0;
4418 }
4419
4420 static void
4421 dlb2_entry_points_init(struct rte_eventdev *dev)
4422 {
4423         struct dlb2_eventdev *dlb2;
4424
4425         /* Expose PMD's eventdev interface */
4426         static struct eventdev_ops dlb2_eventdev_entry_ops = {
4427                 .dev_infos_get    = dlb2_eventdev_info_get,
4428                 .dev_configure    = dlb2_eventdev_configure,
4429                 .dev_start        = dlb2_eventdev_start,
4430                 .dev_stop         = dlb2_eventdev_stop,
4431                 .dev_close        = dlb2_eventdev_close,
4432                 .queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
4433                 .queue_setup      = dlb2_eventdev_queue_setup,
4434                 .queue_release    = dlb2_eventdev_queue_release,
4435                 .port_def_conf    = dlb2_eventdev_port_default_conf_get,
4436                 .port_setup       = dlb2_eventdev_port_setup,
4437                 .port_release     = dlb2_eventdev_port_release,
4438                 .port_link        = dlb2_eventdev_port_link,
4439                 .port_unlink      = dlb2_eventdev_port_unlink,
4440                 .port_unlinks_in_progress =
4441                                     dlb2_eventdev_port_unlinks_in_progress,
4442                 .timeout_ticks    = dlb2_eventdev_timeout_ticks,
4443                 .dump             = dlb2_eventdev_dump,
4444                 .xstats_get       = dlb2_eventdev_xstats_get,
4445                 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4446                 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4447                 .xstats_reset       = dlb2_eventdev_xstats_reset,
4448                 .dev_selftest     = test_dlb2_eventdev,
4449         };
4450
4451         /* Expose PMD's eventdev interface */
4452
4453         dev->dev_ops = &dlb2_eventdev_entry_ops;
4454         dev->enqueue = dlb2_event_enqueue;
4455         dev->enqueue_burst = dlb2_event_enqueue_burst;
4456         dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4457         dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4458
4459         dlb2 = dev->data->dev_private;
4460         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4461                 dev->dequeue = dlb2_event_dequeue_sparse;
4462                 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4463         } else {
4464                 dev->dequeue = dlb2_event_dequeue;
4465                 dev->dequeue_burst = dlb2_event_dequeue_burst;
4466         }
4467 }
4468
4469 int
4470 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4471                             const char *name,
4472                             struct dlb2_devargs *dlb2_args)
4473 {
4474         struct dlb2_eventdev *dlb2;
4475         int err, i;
4476
4477         dlb2 = dev->data->dev_private;
4478
4479         dlb2->event_dev = dev; /* backlink */
4480
4481         evdev_dlb2_default_info.driver_name = name;
4482
4483         dlb2->max_num_events_override = dlb2_args->max_num_events;
4484         dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4485         dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4486         dlb2->poll_interval = dlb2_args->poll_interval;
4487         dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4488         dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4489         dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4490         dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4491
4492         err = dlb2_iface_open(&dlb2->qm_instance, name);
4493         if (err < 0) {
4494                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4495                              err);
4496                 return err;
4497         }
4498
4499         err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4500                                             &dlb2->revision);
4501         if (err < 0) {
4502                 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4503                              err);
4504                 return err;
4505         }
4506
4507         err = dlb2_hw_query_resources(dlb2);
4508         if (err) {
4509                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4510                              err, name);
4511                 return err;
4512         }
4513
4514         dlb2_iface_hardware_init(&dlb2->qm_instance);
4515
4516         err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4517         if (err < 0) {
4518                 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4519                              err);
4520                 return err;
4521         }
4522
4523         /* Complete xtstats runtime initialization */
4524         err = dlb2_xstats_init(dlb2);
4525         if (err) {
4526                 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4527                 return err;
4528         }
4529
4530         /* Initialize each port's token pop mode */
4531         for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4532                 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4533
4534         rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4535
4536         dlb2_iface_low_level_io_init();
4537
4538         dlb2_entry_points_init(dev);
4539
4540         dlb2_init_queue_depth_thresholds(dlb2,
4541                                          dlb2_args->qid_depth_thresholds.val);
4542
4543         return 0;
4544 }
4545
4546 int
4547 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4548                               const char *name)
4549 {
4550         struct dlb2_eventdev *dlb2;
4551         int err;
4552
4553         dlb2 = dev->data->dev_private;
4554
4555         evdev_dlb2_default_info.driver_name = name;
4556
4557         err = dlb2_iface_open(&dlb2->qm_instance, name);
4558         if (err < 0) {
4559                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4560                              err);
4561                 return err;
4562         }
4563
4564         err = dlb2_hw_query_resources(dlb2);
4565         if (err) {
4566                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4567                              err, name);
4568                 return err;
4569         }
4570
4571         dlb2_iface_low_level_io_init();
4572
4573         dlb2_entry_points_init(dev);
4574
4575         return 0;
4576 }
4577
4578 int
4579 dlb2_parse_params(const char *params,
4580                   const char *name,
4581                   struct dlb2_devargs *dlb2_args,
4582                   uint8_t version)
4583 {
4584         int ret = 0;
4585         static const char * const args[] = { NUMA_NODE_ARG,
4586                                              DLB2_MAX_NUM_EVENTS,
4587                                              DLB2_NUM_DIR_CREDITS,
4588                                              DEV_ID_ARG,
4589                                              DLB2_QID_DEPTH_THRESH_ARG,
4590                                              DLB2_COS_ARG,
4591                                              DLB2_POLL_INTERVAL_ARG,
4592                                              DLB2_SW_CREDIT_QUANTA_ARG,
4593                                              DLB2_HW_CREDIT_QUANTA_ARG,
4594                                              DLB2_DEPTH_THRESH_ARG,
4595                                              DLB2_VECTOR_OPTS_ENAB_ARG,
4596                                              NULL };
4597
4598         if (params != NULL && params[0] != '\0') {
4599                 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4600
4601                 if (kvlist == NULL) {
4602                         RTE_LOG(INFO, PMD,
4603                                 "Ignoring unsupported parameters when creating device '%s'\n",
4604                                 name);
4605                 } else {
4606                         int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4607                                                      set_numa_node,
4608                                                      &dlb2_args->socket_id);
4609                         if (ret != 0) {
4610                                 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4611                                              name);
4612                                 rte_kvargs_free(kvlist);
4613                                 return ret;
4614                         }
4615
4616                         ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4617                                                  set_max_num_events,
4618                                                  &dlb2_args->max_num_events);
4619                         if (ret != 0) {
4620                                 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4621                                              name);
4622                                 rte_kvargs_free(kvlist);
4623                                 return ret;
4624                         }
4625
4626                         if (version == DLB2_HW_V2) {
4627                                 ret = rte_kvargs_process(kvlist,
4628                                         DLB2_NUM_DIR_CREDITS,
4629                                         set_num_dir_credits,
4630                                         &dlb2_args->num_dir_credits_override);
4631                                 if (ret != 0) {
4632                                         DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4633                                                      name);
4634                                         rte_kvargs_free(kvlist);
4635                                         return ret;
4636                                 }
4637                         }
4638                         ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4639                                                  set_dev_id,
4640                                                  &dlb2_args->dev_id);
4641                         if (ret != 0) {
4642                                 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4643                                              name);
4644                                 rte_kvargs_free(kvlist);
4645                                 return ret;
4646                         }
4647
4648                         if (version == DLB2_HW_V2) {
4649                                 ret = rte_kvargs_process(
4650                                         kvlist,
4651                                         DLB2_QID_DEPTH_THRESH_ARG,
4652                                         set_qid_depth_thresh,
4653                                         &dlb2_args->qid_depth_thresholds);
4654                         } else {
4655                                 ret = rte_kvargs_process(
4656                                         kvlist,
4657                                         DLB2_QID_DEPTH_THRESH_ARG,
4658                                         set_qid_depth_thresh_v2_5,
4659                                         &dlb2_args->qid_depth_thresholds);
4660                         }
4661                         if (ret != 0) {
4662                                 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4663                                              name);
4664                                 rte_kvargs_free(kvlist);
4665                                 return ret;
4666                         }
4667
4668                         ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4669                                                  set_cos,
4670                                                  &dlb2_args->cos_id);
4671                         if (ret != 0) {
4672                                 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4673                                              name);
4674                                 rte_kvargs_free(kvlist);
4675                                 return ret;
4676                         }
4677
4678                         ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4679                                                  set_poll_interval,
4680                                                  &dlb2_args->poll_interval);
4681                         if (ret != 0) {
4682                                 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4683                                              name);
4684                                 rte_kvargs_free(kvlist);
4685                                 return ret;
4686                         }
4687
4688                         ret = rte_kvargs_process(kvlist,
4689                                                  DLB2_SW_CREDIT_QUANTA_ARG,
4690                                                  set_sw_credit_quanta,
4691                                                  &dlb2_args->sw_credit_quanta);
4692                         if (ret != 0) {
4693                                 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4694                                              name);
4695                                 rte_kvargs_free(kvlist);
4696                                 return ret;
4697                         }
4698
4699                         ret = rte_kvargs_process(kvlist,
4700                                                  DLB2_HW_CREDIT_QUANTA_ARG,
4701                                                  set_hw_credit_quanta,
4702                                                  &dlb2_args->hw_credit_quanta);
4703                         if (ret != 0) {
4704                                 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4705                                              name);
4706                                 rte_kvargs_free(kvlist);
4707                                 return ret;
4708                         }
4709
4710                         ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4711                                         set_default_depth_thresh,
4712                                         &dlb2_args->default_depth_thresh);
4713                         if (ret != 0) {
4714                                 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4715                                              name);
4716                                 rte_kvargs_free(kvlist);
4717                                 return ret;
4718                         }
4719
4720                         ret = rte_kvargs_process(kvlist,
4721                                         DLB2_VECTOR_OPTS_ENAB_ARG,
4722                                         set_vector_opts_enab,
4723                                         &dlb2_args->vector_opts_enabled);
4724                         if (ret != 0) {
4725                                 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4726                                              name);
4727                                 rte_kvargs_free(kvlist);
4728                                 return ret;
4729                         }
4730
4731                         rte_kvargs_free(kvlist);
4732                 }
4733         }
4734         return ret;
4735 }
4736 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);