test/trace: add a test with traces enabled
[dpdk.git] / drivers / event / dlb2 / dlb2.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2020 Intel Corporation
3  */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46         .driver_name = "", /* probe will set */
47         .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48         .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50         .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52         .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54         .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55         .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56         .max_event_priority_levels = DLB2_QID_PRIORITIES,
57         .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58         .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
59         .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60         .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61         .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62         .max_single_link_event_port_queue_pairs =
63                 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64         .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65                           RTE_EVENT_DEV_CAP_EVENT_QOS |
66                           RTE_EVENT_DEV_CAP_BURST_MODE |
67                           RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68                           RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69                           RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
70                           RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
71 };
72
73 struct process_local_port_data
74 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
75
76 static void
77 dlb2_free_qe_mem(struct dlb2_port *qm_port)
78 {
79         if (qm_port == NULL)
80                 return;
81
82         rte_free(qm_port->qe4);
83         qm_port->qe4 = NULL;
84
85         rte_free(qm_port->int_arm_qe);
86         qm_port->int_arm_qe = NULL;
87
88         rte_free(qm_port->consume_qe);
89         qm_port->consume_qe = NULL;
90
91         rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
92         dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
93 }
94
95 /* override defaults with value(s) provided on command line */
96 static void
97 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
98                                  int *qid_depth_thresholds)
99 {
100         int q;
101
102         for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
103                 if (qid_depth_thresholds[q] != 0)
104                         dlb2->ev_queues[q].depth_threshold =
105                                 qid_depth_thresholds[q];
106         }
107 }
108
109 static int
110 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 {
112         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
113         struct dlb2_hw_resource_info *dlb2_info = &handle->info;
114         int num_ldb_ports;
115         int ret;
116
117         /* Query driver resources provisioned for this device */
118
119         ret = dlb2_iface_get_num_resources(handle,
120                                            &dlb2->hw_rsrc_query_results);
121         if (ret) {
122                 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
123                 return ret;
124         }
125
126         /* Complete filling in device resource info returned to evdev app,
127          * overriding any default values.
128          * The capabilities (CAPs) were set at compile time.
129          */
130
131         if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
132                 num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
133         else
134                 num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
135
136         evdev_dlb2_default_info.max_event_queues =
137                 dlb2->hw_rsrc_query_results.num_ldb_queues;
138
139         evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
140
141         if (dlb2->version == DLB2_HW_V2_5) {
142                 evdev_dlb2_default_info.max_num_events =
143                         dlb2->hw_rsrc_query_results.num_credits;
144         } else {
145                 evdev_dlb2_default_info.max_num_events =
146                         dlb2->hw_rsrc_query_results.num_ldb_credits;
147         }
148         /* Save off values used when creating the scheduling domain. */
149
150         handle->info.num_sched_domains =
151                 dlb2->hw_rsrc_query_results.num_sched_domains;
152
153         if (dlb2->version == DLB2_HW_V2_5) {
154                 handle->info.hw_rsrc_max.nb_events_limit =
155                         dlb2->hw_rsrc_query_results.num_credits;
156         } else {
157                 handle->info.hw_rsrc_max.nb_events_limit =
158                         dlb2->hw_rsrc_query_results.num_ldb_credits;
159         }
160         handle->info.hw_rsrc_max.num_queues =
161                 dlb2->hw_rsrc_query_results.num_ldb_queues +
162                 dlb2->hw_rsrc_query_results.num_dir_ports;
163
164         handle->info.hw_rsrc_max.num_ldb_queues =
165                 dlb2->hw_rsrc_query_results.num_ldb_queues;
166
167         handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
168
169         handle->info.hw_rsrc_max.num_dir_ports =
170                 dlb2->hw_rsrc_query_results.num_dir_ports;
171
172         handle->info.hw_rsrc_max.reorder_window_size =
173                 dlb2->hw_rsrc_query_results.num_hist_list_entries;
174
175         rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
176
177         return 0;
178 }
179
180 #define DLB2_BASE_10 10
181
182 static int
183 dlb2_string_to_int(int *result, const char *str)
184 {
185         long ret;
186         char *endptr;
187
188         if (str == NULL || result == NULL)
189                 return -EINVAL;
190
191         errno = 0;
192         ret = strtol(str, &endptr, DLB2_BASE_10);
193         if (errno)
194                 return -errno;
195
196         /* long int and int may be different width for some architectures */
197         if (ret < INT_MIN || ret > INT_MAX || endptr == str)
198                 return -EINVAL;
199
200         *result = ret;
201         return 0;
202 }
203
204 static int
205 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
206 {
207         int *socket_id = opaque;
208         int ret;
209
210         ret = dlb2_string_to_int(socket_id, value);
211         if (ret < 0)
212                 return ret;
213
214         if (*socket_id > RTE_MAX_NUMA_NODES)
215                 return -EINVAL;
216         return 0;
217 }
218
219
220 static int
221 set_max_cq_depth(const char *key __rte_unused,
222                  const char *value,
223                  void *opaque)
224 {
225         int *max_cq_depth = opaque;
226         int ret;
227
228         if (value == NULL || opaque == NULL) {
229                 DLB2_LOG_ERR("NULL pointer\n");
230                 return -EINVAL;
231         }
232
233         ret = dlb2_string_to_int(max_cq_depth, value);
234         if (ret < 0)
235                 return ret;
236
237         if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
238             *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
239             !rte_is_power_of_2(*max_cq_depth)) {
240                 DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 2\n",
241                              DLB2_MIN_CQ_DEPTH_OVERRIDE,
242                              DLB2_MAX_CQ_DEPTH_OVERRIDE);
243                 return -EINVAL;
244         }
245
246         return 0;
247 }
248
249 static int
250 set_max_num_events(const char *key __rte_unused,
251                    const char *value,
252                    void *opaque)
253 {
254         int *max_num_events = opaque;
255         int ret;
256
257         if (value == NULL || opaque == NULL) {
258                 DLB2_LOG_ERR("NULL pointer\n");
259                 return -EINVAL;
260         }
261
262         ret = dlb2_string_to_int(max_num_events, value);
263         if (ret < 0)
264                 return ret;
265
266         if (*max_num_events < 0 || *max_num_events >
267                         DLB2_MAX_NUM_LDB_CREDITS) {
268                 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
269                              DLB2_MAX_NUM_LDB_CREDITS);
270                 return -EINVAL;
271         }
272
273         return 0;
274 }
275
276 static int
277 set_num_dir_credits(const char *key __rte_unused,
278                     const char *value,
279                     void *opaque)
280 {
281         int *num_dir_credits = opaque;
282         int ret;
283
284         if (value == NULL || opaque == NULL) {
285                 DLB2_LOG_ERR("NULL pointer\n");
286                 return -EINVAL;
287         }
288
289         ret = dlb2_string_to_int(num_dir_credits, value);
290         if (ret < 0)
291                 return ret;
292
293         if (*num_dir_credits < 0 ||
294             *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
295                 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
296                              DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
297                 return -EINVAL;
298         }
299
300         return 0;
301 }
302
303 static int
304 set_dev_id(const char *key __rte_unused,
305            const char *value,
306            void *opaque)
307 {
308         int *dev_id = opaque;
309         int ret;
310
311         if (value == NULL || opaque == NULL) {
312                 DLB2_LOG_ERR("NULL pointer\n");
313                 return -EINVAL;
314         }
315
316         ret = dlb2_string_to_int(dev_id, value);
317         if (ret < 0)
318                 return ret;
319
320         return 0;
321 }
322
323 static int
324 set_cos(const char *key __rte_unused,
325         const char *value,
326         void *opaque)
327 {
328         enum dlb2_cos *cos_id = opaque;
329         int x = 0;
330         int ret;
331
332         if (value == NULL || opaque == NULL) {
333                 DLB2_LOG_ERR("NULL pointer\n");
334                 return -EINVAL;
335         }
336
337         ret = dlb2_string_to_int(&x, value);
338         if (ret < 0)
339                 return ret;
340
341         if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
342                 DLB2_LOG_ERR(
343                         "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
344                         x);
345                 return -EINVAL;
346         }
347
348         *cos_id = x;
349
350         return 0;
351 }
352
353 static int
354 set_poll_interval(const char *key __rte_unused,
355         const char *value,
356         void *opaque)
357 {
358         int *poll_interval = opaque;
359         int ret;
360
361         if (value == NULL || opaque == NULL) {
362                 DLB2_LOG_ERR("NULL pointer\n");
363                 return -EINVAL;
364         }
365
366         ret = dlb2_string_to_int(poll_interval, value);
367         if (ret < 0)
368                 return ret;
369
370         return 0;
371 }
372
373 static int
374 set_sw_credit_quanta(const char *key __rte_unused,
375         const char *value,
376         void *opaque)
377 {
378         int *sw_credit_quanta = opaque;
379         int ret;
380
381         if (value == NULL || opaque == NULL) {
382                 DLB2_LOG_ERR("NULL pointer\n");
383                 return -EINVAL;
384         }
385
386         ret = dlb2_string_to_int(sw_credit_quanta, value);
387         if (ret < 0)
388                 return ret;
389
390         return 0;
391 }
392
393 static int
394 set_hw_credit_quanta(const char *key __rte_unused,
395         const char *value,
396         void *opaque)
397 {
398         int *hw_credit_quanta = opaque;
399         int ret;
400
401         if (value == NULL || opaque == NULL) {
402                 DLB2_LOG_ERR("NULL pointer\n");
403                 return -EINVAL;
404         }
405
406         ret = dlb2_string_to_int(hw_credit_quanta, value);
407         if (ret < 0)
408                 return ret;
409
410         return 0;
411 }
412
413 static int
414 set_default_depth_thresh(const char *key __rte_unused,
415         const char *value,
416         void *opaque)
417 {
418         int *default_depth_thresh = opaque;
419         int ret;
420
421         if (value == NULL || opaque == NULL) {
422                 DLB2_LOG_ERR("NULL pointer\n");
423                 return -EINVAL;
424         }
425
426         ret = dlb2_string_to_int(default_depth_thresh, value);
427         if (ret < 0)
428                 return ret;
429
430         return 0;
431 }
432
433 static int
434 set_vector_opts_enab(const char *key __rte_unused,
435         const char *value,
436         void *opaque)
437 {
438         bool *dlb2_vector_opts_enabled = opaque;
439
440         if (value == NULL || opaque == NULL) {
441                 DLB2_LOG_ERR("NULL pointer\n");
442                 return -EINVAL;
443         }
444
445         if ((*value == 'y') || (*value == 'Y'))
446                 *dlb2_vector_opts_enabled = true;
447         else
448                 *dlb2_vector_opts_enabled = false;
449
450         return 0;
451 }
452
453 static int
454 set_qid_depth_thresh(const char *key __rte_unused,
455                      const char *value,
456                      void *opaque)
457 {
458         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
459         int first, last, thresh, i;
460
461         if (value == NULL || opaque == NULL) {
462                 DLB2_LOG_ERR("NULL pointer\n");
463                 return -EINVAL;
464         }
465
466         /* command line override may take one of the following 3 forms:
467          * qid_depth_thresh=all:<threshold_value> ... all queues
468          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
469          * qid_depth_thresh=qid:<threshold_value> ... just one queue
470          */
471         if (sscanf(value, "all:%d", &thresh) == 1) {
472                 first = 0;
473                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
474         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
475                 /* we have everything we need */
476         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
477                 last = first;
478         } else {
479                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
480                 return -EINVAL;
481         }
482
483         if (first > last || first < 0 ||
484                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
485                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
486                 return -EINVAL;
487         }
488
489         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
490                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
491                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
492                 return -EINVAL;
493         }
494
495         for (i = first; i <= last; i++)
496                 qid_thresh->val[i] = thresh; /* indexed by qid */
497
498         return 0;
499 }
500
501 static int
502 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
503                           const char *value,
504                           void *opaque)
505 {
506         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
507         int first, last, thresh, i;
508
509         if (value == NULL || opaque == NULL) {
510                 DLB2_LOG_ERR("NULL pointer\n");
511                 return -EINVAL;
512         }
513
514         /* command line override may take one of the following 3 forms:
515          * qid_depth_thresh=all:<threshold_value> ... all queues
516          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
517          * qid_depth_thresh=qid:<threshold_value> ... just one queue
518          */
519         if (sscanf(value, "all:%d", &thresh) == 1) {
520                 first = 0;
521                 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
522         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
523                 /* we have everything we need */
524         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
525                 last = first;
526         } else {
527                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
528                 return -EINVAL;
529         }
530
531         if (first > last || first < 0 ||
532                 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
533                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
534                 return -EINVAL;
535         }
536
537         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
538                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
539                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
540                 return -EINVAL;
541         }
542
543         for (i = first; i <= last; i++)
544                 qid_thresh->val[i] = thresh; /* indexed by qid */
545
546         return 0;
547 }
548
549 static void
550 dlb2_eventdev_info_get(struct rte_eventdev *dev,
551                        struct rte_event_dev_info *dev_info)
552 {
553         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
554         int ret;
555
556         ret = dlb2_hw_query_resources(dlb2);
557         if (ret) {
558                 const struct rte_eventdev_data *data = dev->data;
559
560                 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
561                              ret, data->dev_id);
562                 /* fn is void, so fall through and return values set up in
563                  * probe
564                  */
565         }
566
567         /* Add num resources currently owned by this domain.
568          * These would become available if the scheduling domain were reset due
569          * to the application recalling eventdev_configure to *reconfigure* the
570          * domain.
571          */
572         evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
573         evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
574         if (dlb2->version == DLB2_HW_V2_5) {
575                 evdev_dlb2_default_info.max_num_events +=
576                         dlb2->max_credits;
577         } else {
578                 evdev_dlb2_default_info.max_num_events +=
579                         dlb2->max_ldb_credits;
580         }
581         evdev_dlb2_default_info.max_event_queues =
582                 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
583                         RTE_EVENT_MAX_QUEUES_PER_DEV);
584
585         evdev_dlb2_default_info.max_num_events =
586                 RTE_MIN(evdev_dlb2_default_info.max_num_events,
587                         dlb2->max_num_events_override);
588
589         *dev_info = evdev_dlb2_default_info;
590 }
591
592 static int
593 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
594                             const struct dlb2_hw_rsrcs *resources_asked,
595                             uint8_t device_version)
596 {
597         int ret = 0;
598         struct dlb2_create_sched_domain_args *cfg;
599
600         if (resources_asked == NULL) {
601                 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
602                 ret = EINVAL;
603                 goto error_exit;
604         }
605
606         /* Map generic qm resources to dlb2 resources */
607         cfg = &handle->cfg.resources;
608
609         /* DIR ports and queues */
610
611         cfg->num_dir_ports = resources_asked->num_dir_ports;
612         if (device_version == DLB2_HW_V2_5)
613                 cfg->num_credits = resources_asked->num_credits;
614         else
615                 cfg->num_dir_credits = resources_asked->num_dir_credits;
616
617         /* LDB queues */
618
619         cfg->num_ldb_queues = resources_asked->num_ldb_queues;
620
621         /* LDB ports */
622
623         cfg->cos_strict = 0; /* Best effort */
624         cfg->num_cos_ldb_ports[0] = 0;
625         cfg->num_cos_ldb_ports[1] = 0;
626         cfg->num_cos_ldb_ports[2] = 0;
627         cfg->num_cos_ldb_ports[3] = 0;
628
629         switch (handle->cos_id) {
630         case DLB2_COS_0:
631                 cfg->num_ldb_ports = 0; /* no don't care ports */
632                 cfg->num_cos_ldb_ports[0] =
633                         resources_asked->num_ldb_ports;
634                 break;
635         case DLB2_COS_1:
636                 cfg->num_ldb_ports = 0; /* no don't care ports */
637                 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
638                 break;
639         case DLB2_COS_2:
640                 cfg->num_ldb_ports = 0; /* no don't care ports */
641                 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
642                 break;
643         case DLB2_COS_3:
644                 cfg->num_ldb_ports = 0; /* no don't care ports */
645                 cfg->num_cos_ldb_ports[3] =
646                         resources_asked->num_ldb_ports;
647                 break;
648         case DLB2_COS_DEFAULT:
649                 /* all ldb ports are don't care ports from a cos perspective */
650                 cfg->num_ldb_ports =
651                         resources_asked->num_ldb_ports;
652                 break;
653         }
654
655         if (device_version == DLB2_HW_V2)
656                 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
657
658         cfg->num_atomic_inflights =
659                 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
660                 cfg->num_ldb_queues;
661
662         cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
663                 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
664
665         if (device_version == DLB2_HW_V2_5) {
666                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
667                              cfg->num_ldb_queues,
668                              resources_asked->num_ldb_ports,
669                              cfg->num_dir_ports,
670                              cfg->num_atomic_inflights,
671                              cfg->num_hist_list_entries,
672                              cfg->num_credits);
673         } else {
674                 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
675                              cfg->num_ldb_queues,
676                              resources_asked->num_ldb_ports,
677                              cfg->num_dir_ports,
678                              cfg->num_atomic_inflights,
679                              cfg->num_hist_list_entries,
680                              cfg->num_ldb_credits,
681                              cfg->num_dir_credits);
682         }
683
684         /* Configure the QM */
685
686         ret = dlb2_iface_sched_domain_create(handle, cfg);
687         if (ret < 0) {
688                 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
689                              ret,
690                              dlb2_error_strings[cfg->response.status]);
691
692                 goto error_exit;
693         }
694
695         handle->domain_id = cfg->response.id;
696         handle->cfg.configured = true;
697
698 error_exit:
699
700         return ret;
701 }
702
703 static void
704 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
705 {
706         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
707         enum dlb2_configuration_state config_state;
708         int i, j;
709
710         dlb2_iface_domain_reset(dlb2);
711
712         /* Free all dynamically allocated port memory */
713         for (i = 0; i < dlb2->num_ports; i++)
714                 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
715
716         /* If reconfiguring, mark the device's queues and ports as "previously
717          * configured." If the user doesn't reconfigure them, the PMD will
718          * reapply their previous configuration when the device is started.
719          */
720         config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
721                 DLB2_NOT_CONFIGURED;
722
723         for (i = 0; i < dlb2->num_ports; i++) {
724                 dlb2->ev_ports[i].qm_port.config_state = config_state;
725                 /* Reset setup_done so ports can be reconfigured */
726                 dlb2->ev_ports[i].setup_done = false;
727                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
728                         dlb2->ev_ports[i].link[j].mapped = false;
729         }
730
731         for (i = 0; i < dlb2->num_queues; i++)
732                 dlb2->ev_queues[i].qm_queue.config_state = config_state;
733
734         for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
735                 dlb2->ev_queues[i].setup_done = false;
736
737         dlb2->num_ports = 0;
738         dlb2->num_ldb_ports = 0;
739         dlb2->num_dir_ports = 0;
740         dlb2->num_queues = 0;
741         dlb2->num_ldb_queues = 0;
742         dlb2->num_dir_queues = 0;
743         dlb2->configured = false;
744 }
745
746 /* Note: 1 QM instance per QM device, QM instance/device == event device */
747 static int
748 dlb2_eventdev_configure(const struct rte_eventdev *dev)
749 {
750         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
751         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
752         struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
753         const struct rte_eventdev_data *data = dev->data;
754         const struct rte_event_dev_config *config = &data->dev_conf;
755         int ret;
756
757         /* If this eventdev is already configured, we must release the current
758          * scheduling domain before attempting to configure a new one.
759          */
760         if (dlb2->configured) {
761                 dlb2_hw_reset_sched_domain(dev, true);
762                 ret = dlb2_hw_query_resources(dlb2);
763                 if (ret) {
764                         DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
765                                      ret, data->dev_id);
766                         return ret;
767                 }
768         }
769
770         if (config->nb_event_queues > rsrcs->num_queues) {
771                 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
772                              config->nb_event_queues,
773                              rsrcs->num_queues);
774                 return -EINVAL;
775         }
776         if (config->nb_event_ports > (rsrcs->num_ldb_ports
777                         + rsrcs->num_dir_ports)) {
778                 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
779                              config->nb_event_ports,
780                              (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
781                 return -EINVAL;
782         }
783         if (config->nb_events_limit > rsrcs->nb_events_limit) {
784                 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
785                              config->nb_events_limit,
786                              rsrcs->nb_events_limit);
787                 return -EINVAL;
788         }
789
790         if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
791                 dlb2->global_dequeue_wait = false;
792         else {
793                 uint32_t timeout32;
794
795                 dlb2->global_dequeue_wait = true;
796
797                 /* note size mismatch of timeout vals in eventdev lib. */
798                 timeout32 = config->dequeue_timeout_ns;
799
800                 dlb2->global_dequeue_wait_ticks =
801                         timeout32 * (rte_get_timer_hz() / 1E9);
802         }
803
804         /* Does this platform support umonitor/umwait? */
805         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
806                 dlb2->umwait_allowed = true;
807
808         rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
809         rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
810         /* 1 dir queue per dir port */
811         rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
812
813         if (dlb2->version == DLB2_HW_V2_5) {
814                 rsrcs->num_credits = 0;
815                 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
816                         rsrcs->num_credits = config->nb_events_limit;
817         } else {
818                 /* Scale down nb_events_limit by 4 for directed credits,
819                  * since there are 4x as many load-balanced credits.
820                  */
821                 rsrcs->num_ldb_credits = 0;
822                 rsrcs->num_dir_credits = 0;
823
824                 if (rsrcs->num_ldb_queues)
825                         rsrcs->num_ldb_credits = config->nb_events_limit;
826                 if (rsrcs->num_dir_ports)
827                         rsrcs->num_dir_credits = config->nb_events_limit / 2;
828                 if (dlb2->num_dir_credits_override != -1)
829                         rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
830         }
831
832         if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
833                 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
834                 return -ENODEV;
835         }
836
837         dlb2->new_event_limit = config->nb_events_limit;
838         __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
839
840         /* Save number of ports/queues for this event dev */
841         dlb2->num_ports = config->nb_event_ports;
842         dlb2->num_queues = config->nb_event_queues;
843         dlb2->num_dir_ports = rsrcs->num_dir_ports;
844         dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
845         dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
846         dlb2->num_dir_queues = dlb2->num_dir_ports;
847         if (dlb2->version == DLB2_HW_V2_5) {
848                 dlb2->credit_pool = rsrcs->num_credits;
849                 dlb2->max_credits = rsrcs->num_credits;
850         } else {
851                 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
852                 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
853                 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
854                 dlb2->max_dir_credits = rsrcs->num_dir_credits;
855         }
856
857         dlb2->configured = true;
858
859         return 0;
860 }
861
862 static void
863 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
864                                     uint8_t port_id,
865                                     struct rte_event_port_conf *port_conf)
866 {
867         RTE_SET_USED(port_id);
868         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
869
870         port_conf->new_event_threshold = dlb2->new_event_limit;
871         port_conf->dequeue_depth = 32;
872         port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
873         port_conf->event_port_cfg = 0;
874 }
875
876 static void
877 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
878                                      uint8_t queue_id,
879                                      struct rte_event_queue_conf *queue_conf)
880 {
881         RTE_SET_USED(dev);
882         RTE_SET_USED(queue_id);
883
884         queue_conf->nb_atomic_flows = 1024;
885         queue_conf->nb_atomic_order_sequences = 64;
886         queue_conf->event_queue_cfg = 0;
887         queue_conf->priority = 0;
888 }
889
890 static int32_t
891 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
892 {
893         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
894         struct dlb2_get_sn_allocation_args cfg;
895         int ret;
896
897         cfg.group = group;
898
899         ret = dlb2_iface_get_sn_allocation(handle, &cfg);
900         if (ret < 0) {
901                 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
902                              ret, dlb2_error_strings[cfg.response.status]);
903                 return ret;
904         }
905
906         return cfg.response.id;
907 }
908
909 static int
910 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
911 {
912         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
913         struct dlb2_set_sn_allocation_args cfg;
914         int ret;
915
916         cfg.num = num;
917         cfg.group = group;
918
919         ret = dlb2_iface_set_sn_allocation(handle, &cfg);
920         if (ret < 0) {
921                 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
922                              ret, dlb2_error_strings[cfg.response.status]);
923                 return ret;
924         }
925
926         return ret;
927 }
928
929 static int32_t
930 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
931 {
932         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
933         struct dlb2_get_sn_occupancy_args cfg;
934         int ret;
935
936         cfg.group = group;
937
938         ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
939         if (ret < 0) {
940                 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
941                              ret, dlb2_error_strings[cfg.response.status]);
942                 return ret;
943         }
944
945         return cfg.response.id;
946 }
947
948 /* Query the current sequence number allocations and, if they conflict with the
949  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
950  * This is best-effort; if it fails, the PMD will attempt to configure the
951  * load-balanced queue and return an error.
952  */
953 static void
954 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
955                            const struct rte_event_queue_conf *queue_conf)
956 {
957         int grp_occupancy[DLB2_NUM_SN_GROUPS];
958         int grp_alloc[DLB2_NUM_SN_GROUPS];
959         int i, sequence_numbers;
960
961         sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
962
963         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
964                 int total_slots;
965
966                 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
967                 if (grp_alloc[i] < 0)
968                         return;
969
970                 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
971
972                 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
973                 if (grp_occupancy[i] < 0)
974                         return;
975
976                 /* DLB has at least one available slot for the requested
977                  * sequence numbers, so no further configuration required.
978                  */
979                 if (grp_alloc[i] == sequence_numbers &&
980                     grp_occupancy[i] < total_slots)
981                         return;
982         }
983
984         /* None of the sequence number groups are configured for the requested
985          * sequence numbers, so we have to reconfigure one of them. This is
986          * only possible if a group is not in use.
987          */
988         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
989                 if (grp_occupancy[i] == 0)
990                         break;
991         }
992
993         if (i == DLB2_NUM_SN_GROUPS) {
994                 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
995                        __func__, sequence_numbers);
996                 return;
997         }
998
999         /* Attempt to configure slot i with the requested number of sequence
1000          * numbers. Ignore the return value -- if this fails, the error will be
1001          * caught during subsequent queue configuration.
1002          */
1003         dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
1004 }
1005
1006 static int32_t
1007 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
1008                          struct dlb2_eventdev_queue *ev_queue,
1009                          const struct rte_event_queue_conf *evq_conf)
1010 {
1011         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1012         struct dlb2_queue *queue = &ev_queue->qm_queue;
1013         struct dlb2_create_ldb_queue_args cfg;
1014         int32_t ret;
1015         uint32_t qm_qid;
1016         int sched_type = -1;
1017
1018         if (evq_conf == NULL)
1019                 return -EINVAL;
1020
1021         if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
1022                 if (evq_conf->nb_atomic_order_sequences != 0)
1023                         sched_type = RTE_SCHED_TYPE_ORDERED;
1024                 else
1025                         sched_type = RTE_SCHED_TYPE_PARALLEL;
1026         } else
1027                 sched_type = evq_conf->schedule_type;
1028
1029         cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
1030         cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
1031         cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
1032
1033         if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1034                 cfg.num_sequence_numbers = 0;
1035                 cfg.num_qid_inflights = 2048;
1036         }
1037
1038         /* App should set this to the number of hardware flows they want, not
1039          * the overall number of flows they're going to use. E.g. if app is
1040          * using 64 flows and sets compression to 64, best-case they'll get
1041          * 64 unique hashed flows in hardware.
1042          */
1043         switch (evq_conf->nb_atomic_flows) {
1044         /* Valid DLB2 compression levels */
1045         case 64:
1046         case 128:
1047         case 256:
1048         case 512:
1049         case (1 * 1024): /* 1K */
1050         case (2 * 1024): /* 2K */
1051         case (4 * 1024): /* 4K */
1052         case (64 * 1024): /* 64K */
1053                 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1054                 break;
1055         default:
1056                 /* Invalid compression level */
1057                 cfg.lock_id_comp_level = 0; /* no compression */
1058         }
1059
1060         if (ev_queue->depth_threshold == 0) {
1061                 cfg.depth_threshold = dlb2->default_depth_thresh;
1062                 ev_queue->depth_threshold =
1063                         dlb2->default_depth_thresh;
1064         } else
1065                 cfg.depth_threshold = ev_queue->depth_threshold;
1066
1067         ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1068         if (ret < 0) {
1069                 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1070                              ret, dlb2_error_strings[cfg.response.status]);
1071                 return -EINVAL;
1072         }
1073
1074         qm_qid = cfg.response.id;
1075
1076         /* Save off queue config for debug, resource lookups, and reconfig */
1077         queue->num_qid_inflights = cfg.num_qid_inflights;
1078         queue->num_atm_inflights = cfg.num_atomic_inflights;
1079
1080         queue->sched_type = sched_type;
1081         queue->config_state = DLB2_CONFIGURED;
1082
1083         DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1084                      qm_qid,
1085                      cfg.num_atomic_inflights,
1086                      cfg.num_sequence_numbers,
1087                      cfg.num_qid_inflights);
1088
1089         return qm_qid;
1090 }
1091
1092 static int
1093 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1094                               struct dlb2_eventdev_queue *ev_queue,
1095                               const struct rte_event_queue_conf *queue_conf)
1096 {
1097         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1098         int32_t qm_qid;
1099
1100         if (queue_conf->nb_atomic_order_sequences)
1101                 dlb2_program_sn_allocation(dlb2, queue_conf);
1102
1103         qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1104         if (qm_qid < 0) {
1105                 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1106
1107                 return qm_qid;
1108         }
1109
1110         dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1111
1112         ev_queue->qm_queue.id = qm_qid;
1113
1114         return 0;
1115 }
1116
1117 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1118 {
1119         int i, num = 0;
1120
1121         for (i = 0; i < dlb2->num_queues; i++) {
1122                 if (dlb2->ev_queues[i].setup_done &&
1123                     dlb2->ev_queues[i].qm_queue.is_directed)
1124                         num++;
1125         }
1126
1127         return num;
1128 }
1129
1130 static void
1131 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1132                          struct dlb2_eventdev_queue *ev_queue)
1133 {
1134         struct dlb2_eventdev_port *ev_port;
1135         int i, j;
1136
1137         for (i = 0; i < dlb2->num_ports; i++) {
1138                 ev_port = &dlb2->ev_ports[i];
1139
1140                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1141                         if (!ev_port->link[j].valid ||
1142                             ev_port->link[j].queue_id != ev_queue->id)
1143                                 continue;
1144
1145                         ev_port->link[j].valid = false;
1146                         ev_port->num_links--;
1147                 }
1148         }
1149
1150         ev_queue->num_links = 0;
1151 }
1152
1153 static int
1154 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1155                           uint8_t ev_qid,
1156                           const struct rte_event_queue_conf *queue_conf)
1157 {
1158         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1159         struct dlb2_eventdev_queue *ev_queue;
1160         int ret;
1161
1162         if (queue_conf == NULL)
1163                 return -EINVAL;
1164
1165         if (ev_qid >= dlb2->num_queues)
1166                 return -EINVAL;
1167
1168         ev_queue = &dlb2->ev_queues[ev_qid];
1169
1170         ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1171                 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1172         ev_queue->id = ev_qid;
1173         ev_queue->conf = *queue_conf;
1174
1175         if (!ev_queue->qm_queue.is_directed) {
1176                 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1177         } else {
1178                 /* The directed queue isn't setup until link time, at which
1179                  * point we know its directed port ID. Directed queue setup
1180                  * will only fail if this queue is already setup or there are
1181                  * no directed queues left to configure.
1182                  */
1183                 ret = 0;
1184
1185                 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1186
1187                 if (ev_queue->setup_done ||
1188                     dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1189                         ret = -EINVAL;
1190         }
1191
1192         /* Tear down pre-existing port->queue links */
1193         if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1194                 dlb2_queue_link_teardown(dlb2, ev_queue);
1195
1196         if (!ret)
1197                 ev_queue->setup_done = true;
1198
1199         return ret;
1200 }
1201
1202 static int
1203 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1204 {
1205         struct dlb2_cq_pop_qe *qe;
1206
1207         qe = rte_zmalloc(mz_name,
1208                         DLB2_NUM_QES_PER_CACHE_LINE *
1209                                 sizeof(struct dlb2_cq_pop_qe),
1210                         RTE_CACHE_LINE_SIZE);
1211
1212         if (qe == NULL) {
1213                 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1214                 return -ENOMEM;
1215         }
1216         qm_port->consume_qe = qe;
1217
1218         qe->qe_valid = 0;
1219         qe->qe_frag = 0;
1220         qe->qe_comp = 0;
1221         qe->cq_token = 1;
1222         /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1223          * and so on.
1224          */
1225         qe->tokens = 0; /* set at run time */
1226         qe->meas_lat = 0;
1227         qe->no_dec = 0;
1228         /* Completion IDs are disabled */
1229         qe->cmp_id = 0;
1230
1231         return 0;
1232 }
1233
1234 static int
1235 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1236 {
1237         struct dlb2_enqueue_qe *qe;
1238
1239         qe = rte_zmalloc(mz_name,
1240                         DLB2_NUM_QES_PER_CACHE_LINE *
1241                                 sizeof(struct dlb2_enqueue_qe),
1242                         RTE_CACHE_LINE_SIZE);
1243
1244         if (qe == NULL) {
1245                 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1246                 return -ENOMEM;
1247         }
1248         qm_port->int_arm_qe = qe;
1249
1250         /* V2 - INT ARM is CQ_TOKEN + FRAG */
1251         qe->qe_valid = 0;
1252         qe->qe_frag = 1;
1253         qe->qe_comp = 0;
1254         qe->cq_token = 1;
1255         qe->meas_lat = 0;
1256         qe->no_dec = 0;
1257         /* Completion IDs are disabled */
1258         qe->cmp_id = 0;
1259
1260         return 0;
1261 }
1262
1263 static int
1264 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1265 {
1266         int ret, sz;
1267
1268         sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1269
1270         qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1271
1272         if (qm_port->qe4 == NULL) {
1273                 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1274                 ret = -ENOMEM;
1275                 goto error_exit;
1276         }
1277
1278         ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1279         if (ret < 0) {
1280                 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1281                 goto error_exit;
1282         }
1283
1284         ret = dlb2_init_consume_qe(qm_port, mz_name);
1285         if (ret < 0) {
1286                 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1287                 goto error_exit;
1288         }
1289
1290         return 0;
1291
1292 error_exit:
1293
1294         dlb2_free_qe_mem(qm_port);
1295
1296         return ret;
1297 }
1298
1299 static inline uint16_t
1300 dlb2_event_enqueue_delayed(void *event_port,
1301                            const struct rte_event events[]);
1302
1303 static inline uint16_t
1304 dlb2_event_enqueue_burst_delayed(void *event_port,
1305                                  const struct rte_event events[],
1306                                  uint16_t num);
1307
1308 static inline uint16_t
1309 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1310                                      const struct rte_event events[],
1311                                      uint16_t num);
1312
1313 static inline uint16_t
1314 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1315                                          const struct rte_event events[],
1316                                          uint16_t num);
1317
1318 /* Generate the required bitmask for rotate-style expected QE gen bits.
1319  * This requires a pattern of 1's and zeros, starting with expected as
1320  * 1 bits, so when hardware writes 0's they're "new". This requires the
1321  * ring size to be powers of 2 to wrap correctly.
1322  */
1323 static void
1324 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1325 {
1326         uint64_t cq_build_mask = 0;
1327         uint32_t i;
1328
1329         if (cq_depth > 64)
1330                 return; /* need to fall back to scalar code */
1331
1332         /*
1333          * all 1's in first u64, all zeros in second is correct bit pattern to
1334          * start. Special casing == 64 easier than adapting complex loop logic.
1335          */
1336         if (cq_depth == 64) {
1337                 qm_port->cq_rolling_mask = 0;
1338                 qm_port->cq_rolling_mask_2 = -1;
1339                 return;
1340         }
1341
1342         for (i = 0; i < 64; i += (cq_depth * 2))
1343                 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1344
1345         qm_port->cq_rolling_mask = cq_build_mask;
1346         qm_port->cq_rolling_mask_2 = cq_build_mask;
1347 }
1348
1349 static int
1350 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1351                         struct dlb2_eventdev_port *ev_port,
1352                         uint32_t dequeue_depth,
1353                         uint32_t enqueue_depth)
1354 {
1355         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1356         struct dlb2_create_ldb_port_args cfg = { {0} };
1357         int ret;
1358         struct dlb2_port *qm_port = NULL;
1359         char mz_name[RTE_MEMZONE_NAMESIZE];
1360         uint32_t qm_port_id;
1361         uint16_t ldb_credit_high_watermark = 0;
1362         uint16_t dir_credit_high_watermark = 0;
1363         uint16_t credit_high_watermark = 0;
1364
1365         if (handle == NULL)
1366                 return -EINVAL;
1367
1368         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1369                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1370                              DLB2_MIN_CQ_DEPTH);
1371                 return -EINVAL;
1372         }
1373
1374         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1375                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1376                              DLB2_MIN_ENQUEUE_DEPTH);
1377                 return -EINVAL;
1378         }
1379
1380         rte_spinlock_lock(&handle->resource_lock);
1381
1382         /* We round up to the next power of 2 if necessary */
1383         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1384         cfg.cq_depth_threshold = 1;
1385
1386         cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1387
1388         if (handle->cos_id == DLB2_COS_DEFAULT)
1389                 cfg.cos_id = 0;
1390         else
1391                 cfg.cos_id = handle->cos_id;
1392
1393         cfg.cos_strict = 0;
1394
1395         /* User controls the LDB high watermark via enqueue depth. The DIR high
1396          * watermark is equal, unless the directed credit pool is too small.
1397          */
1398         if (dlb2->version == DLB2_HW_V2) {
1399                 ldb_credit_high_watermark = enqueue_depth;
1400                 /* If there are no directed ports, the kernel driver will
1401                  * ignore this port's directed credit settings. Don't use
1402                  * enqueue_depth if it would require more directed credits
1403                  * than are available.
1404                  */
1405                 dir_credit_high_watermark =
1406                         RTE_MIN(enqueue_depth,
1407                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1408         } else
1409                 credit_high_watermark = enqueue_depth;
1410
1411         /* Per QM values */
1412
1413         ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1414         if (ret < 0) {
1415                 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1416                              ret, dlb2_error_strings[cfg.response.status]);
1417                 goto error_exit;
1418         }
1419
1420         qm_port_id = cfg.response.id;
1421
1422         DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1423                      ev_port->id, qm_port_id);
1424
1425         qm_port = &ev_port->qm_port;
1426         qm_port->ev_port = ev_port; /* back ptr */
1427         qm_port->dlb2 = dlb2; /* back ptr */
1428         /*
1429          * Allocate and init local qe struct(s).
1430          * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1431          */
1432
1433         snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1434                  ev_port->id);
1435
1436         ret = dlb2_init_qe_mem(qm_port, mz_name);
1437         if (ret < 0) {
1438                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1439                 goto error_exit;
1440         }
1441
1442         qm_port->id = qm_port_id;
1443
1444         if (dlb2->version == DLB2_HW_V2) {
1445                 qm_port->cached_ldb_credits = 0;
1446                 qm_port->cached_dir_credits = 0;
1447         } else
1448                 qm_port->cached_credits = 0;
1449
1450         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1451          * the effective depth is smaller.
1452          */
1453         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1454         qm_port->cq_idx = 0;
1455         qm_port->cq_idx_unmasked = 0;
1456
1457         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1458                 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1459         else
1460                 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1461
1462         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1463         /* starting value of gen bit - it toggles at wrap time */
1464         qm_port->gen_bit = 1;
1465
1466         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1467
1468         qm_port->int_armed = false;
1469
1470         /* Save off for later use in info and lookup APIs. */
1471         qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1472
1473         qm_port->dequeue_depth = dequeue_depth;
1474         qm_port->token_pop_thresh = dequeue_depth;
1475
1476         /* The default enqueue functions do not include delayed-pop support for
1477          * performance reasons.
1478          */
1479         if (qm_port->token_pop_mode == DELAYED_POP) {
1480                 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1481                 dlb2->event_dev->enqueue_burst =
1482                         dlb2_event_enqueue_burst_delayed;
1483                 dlb2->event_dev->enqueue_new_burst =
1484                         dlb2_event_enqueue_new_burst_delayed;
1485                 dlb2->event_dev->enqueue_forward_burst =
1486                         dlb2_event_enqueue_forward_burst_delayed;
1487         }
1488
1489         qm_port->owed_tokens = 0;
1490         qm_port->issued_releases = 0;
1491
1492         /* Save config message too. */
1493         rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1494
1495         /* update state */
1496         qm_port->state = PORT_STARTED; /* enabled at create time */
1497         qm_port->config_state = DLB2_CONFIGURED;
1498
1499         if (dlb2->version == DLB2_HW_V2) {
1500                 qm_port->dir_credits = dir_credit_high_watermark;
1501                 qm_port->ldb_credits = ldb_credit_high_watermark;
1502                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1503                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1504
1505                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1506                              qm_port_id,
1507                              dequeue_depth,
1508                              qm_port->ldb_credits,
1509                              qm_port->dir_credits);
1510         } else {
1511                 qm_port->credits = credit_high_watermark;
1512                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1513
1514                 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1515                              qm_port_id,
1516                              dequeue_depth,
1517                              qm_port->credits);
1518         }
1519
1520         qm_port->use_scalar = false;
1521
1522 #if (!defined RTE_ARCH_X86_64)
1523         qm_port->use_scalar = true;
1524 #else
1525         if ((qm_port->cq_depth > 64) ||
1526             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1527             (dlb2->vector_opts_enabled == false))
1528                 qm_port->use_scalar = true;
1529 #endif
1530
1531         rte_spinlock_unlock(&handle->resource_lock);
1532
1533         return 0;
1534
1535 error_exit:
1536
1537         if (qm_port)
1538                 dlb2_free_qe_mem(qm_port);
1539
1540         rte_spinlock_unlock(&handle->resource_lock);
1541
1542         DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1543
1544         return ret;
1545 }
1546
1547 static void
1548 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1549                         struct dlb2_eventdev_port *ev_port)
1550 {
1551         struct dlb2_eventdev_queue *ev_queue;
1552         int i;
1553
1554         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1555                 if (!ev_port->link[i].valid)
1556                         continue;
1557
1558                 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1559
1560                 ev_port->link[i].valid = false;
1561                 ev_port->num_links--;
1562                 ev_queue->num_links--;
1563         }
1564 }
1565
1566 static int
1567 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1568                         struct dlb2_eventdev_port *ev_port,
1569                         uint32_t dequeue_depth,
1570                         uint32_t enqueue_depth)
1571 {
1572         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1573         struct dlb2_create_dir_port_args cfg = { {0} };
1574         int ret;
1575         struct dlb2_port *qm_port = NULL;
1576         char mz_name[RTE_MEMZONE_NAMESIZE];
1577         uint32_t qm_port_id;
1578         uint16_t ldb_credit_high_watermark = 0;
1579         uint16_t dir_credit_high_watermark = 0;
1580         uint16_t credit_high_watermark = 0;
1581
1582         if (dlb2 == NULL || handle == NULL)
1583                 return -EINVAL;
1584
1585         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1586                 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1587                              DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1588                 return -EINVAL;
1589         }
1590
1591         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1592                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1593                              DLB2_MIN_ENQUEUE_DEPTH);
1594                 return -EINVAL;
1595         }
1596
1597         rte_spinlock_lock(&handle->resource_lock);
1598
1599         /* Directed queues are configured at link time. */
1600         cfg.queue_id = -1;
1601
1602         /* We round up to the next power of 2 if necessary */
1603         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1604         cfg.cq_depth_threshold = 1;
1605
1606         /* User controls the LDB high watermark via enqueue depth. The DIR high
1607          * watermark is equal, unless the directed credit pool is too small.
1608          */
1609         if (dlb2->version == DLB2_HW_V2) {
1610                 ldb_credit_high_watermark = enqueue_depth;
1611                 /* Don't use enqueue_depth if it would require more directed
1612                  * credits than are available.
1613                  */
1614                 dir_credit_high_watermark =
1615                         RTE_MIN(enqueue_depth,
1616                                 handle->cfg.num_dir_credits / dlb2->num_ports);
1617         } else
1618                 credit_high_watermark = enqueue_depth;
1619
1620         /* Per QM values */
1621
1622         ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1623         if (ret < 0) {
1624                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1625                              ret, dlb2_error_strings[cfg.response.status]);
1626                 goto error_exit;
1627         }
1628
1629         qm_port_id = cfg.response.id;
1630
1631         DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1632                      ev_port->id, qm_port_id);
1633
1634         qm_port = &ev_port->qm_port;
1635         qm_port->ev_port = ev_port; /* back ptr */
1636         qm_port->dlb2 = dlb2;  /* back ptr */
1637
1638         /*
1639          * Init local qe struct(s).
1640          * Note: MOVDIR64 requires the enqueue QE to be aligned
1641          */
1642
1643         snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1644                  ev_port->id);
1645
1646         ret = dlb2_init_qe_mem(qm_port, mz_name);
1647
1648         if (ret < 0) {
1649                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1650                 goto error_exit;
1651         }
1652
1653         qm_port->id = qm_port_id;
1654
1655         if (dlb2->version == DLB2_HW_V2) {
1656                 qm_port->cached_ldb_credits = 0;
1657                 qm_port->cached_dir_credits = 0;
1658         } else
1659                 qm_port->cached_credits = 0;
1660
1661         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1662          * the effective depth is smaller.
1663          */
1664         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1665         qm_port->cq_idx = 0;
1666         qm_port->cq_idx_unmasked = 0;
1667
1668         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1669                 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1670         else
1671                 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1672
1673         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1674         /* starting value of gen bit - it toggles at wrap time */
1675         qm_port->gen_bit = 1;
1676         dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1677
1678         qm_port->int_armed = false;
1679
1680         /* Save off for later use in info and lookup APIs. */
1681         qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1682
1683         qm_port->dequeue_depth = dequeue_depth;
1684
1685         /* Directed ports are auto-pop, by default. */
1686         qm_port->token_pop_mode = AUTO_POP;
1687         qm_port->owed_tokens = 0;
1688         qm_port->issued_releases = 0;
1689
1690         /* Save config message too. */
1691         rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1692
1693         /* update state */
1694         qm_port->state = PORT_STARTED; /* enabled at create time */
1695         qm_port->config_state = DLB2_CONFIGURED;
1696
1697         if (dlb2->version == DLB2_HW_V2) {
1698                 qm_port->dir_credits = dir_credit_high_watermark;
1699                 qm_port->ldb_credits = ldb_credit_high_watermark;
1700                 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1701                 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1702
1703                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1704                              qm_port_id,
1705                              dequeue_depth,
1706                              dir_credit_high_watermark,
1707                              ldb_credit_high_watermark);
1708         } else {
1709                 qm_port->credits = credit_high_watermark;
1710                 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1711
1712                 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1713                              qm_port_id,
1714                              dequeue_depth,
1715                              credit_high_watermark);
1716         }
1717
1718 #if (!defined RTE_ARCH_X86_64)
1719         qm_port->use_scalar = true;
1720 #else
1721         if ((qm_port->cq_depth > 64) ||
1722             (!rte_is_power_of_2(qm_port->cq_depth)) ||
1723             (dlb2->vector_opts_enabled == false))
1724                 qm_port->use_scalar = true;
1725 #endif
1726
1727         rte_spinlock_unlock(&handle->resource_lock);
1728
1729         return 0;
1730
1731 error_exit:
1732
1733         if (qm_port)
1734                 dlb2_free_qe_mem(qm_port);
1735
1736         rte_spinlock_unlock(&handle->resource_lock);
1737
1738         DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1739
1740         return ret;
1741 }
1742
1743 static int
1744 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1745                          uint8_t ev_port_id,
1746                          const struct rte_event_port_conf *port_conf)
1747 {
1748         struct dlb2_eventdev *dlb2;
1749         struct dlb2_eventdev_port *ev_port;
1750         int ret;
1751         uint32_t hw_credit_quanta, sw_credit_quanta;
1752
1753         if (dev == NULL || port_conf == NULL) {
1754                 DLB2_LOG_ERR("Null parameter\n");
1755                 return -EINVAL;
1756         }
1757
1758         dlb2 = dlb2_pmd_priv(dev);
1759
1760         if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1761                 return -EINVAL;
1762
1763         if (port_conf->dequeue_depth >
1764                 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1765             port_conf->enqueue_depth >
1766                 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1767                 return -EINVAL;
1768
1769         ev_port = &dlb2->ev_ports[ev_port_id];
1770         /* configured? */
1771         if (ev_port->setup_done) {
1772                 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1773                 return -EINVAL;
1774         }
1775
1776         ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1777                 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1778
1779         if (!ev_port->qm_port.is_directed) {
1780                 ret = dlb2_hw_create_ldb_port(dlb2,
1781                                               ev_port,
1782                                               port_conf->dequeue_depth,
1783                                               port_conf->enqueue_depth);
1784                 if (ret < 0) {
1785                         DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1786                                      ev_port_id);
1787
1788                         return ret;
1789                 }
1790         } else {
1791                 ret = dlb2_hw_create_dir_port(dlb2,
1792                                               ev_port,
1793                                               port_conf->dequeue_depth,
1794                                               port_conf->enqueue_depth);
1795                 if (ret < 0) {
1796                         DLB2_LOG_ERR("Failed to create the DIR port\n");
1797                         return ret;
1798                 }
1799         }
1800
1801         /* Save off port config for reconfig */
1802         ev_port->conf = *port_conf;
1803
1804         ev_port->id = ev_port_id;
1805         ev_port->enq_configured = true;
1806         ev_port->setup_done = true;
1807         ev_port->inflight_max = port_conf->new_event_threshold;
1808         ev_port->implicit_release = !(port_conf->event_port_cfg &
1809                   RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1810         ev_port->outstanding_releases = 0;
1811         ev_port->inflight_credits = 0;
1812         ev_port->dlb2 = dlb2; /* reverse link */
1813
1814         /* Default for worker ports */
1815         sw_credit_quanta = dlb2->sw_credit_quanta;
1816         hw_credit_quanta = dlb2->hw_credit_quanta;
1817
1818         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1819                 /* Producer type ports. Mostly enqueue */
1820                 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1821                 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1822         }
1823         if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1824                 /* Consumer type ports. Mostly dequeue */
1825                 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1826                 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1827         }
1828         ev_port->credit_update_quanta = sw_credit_quanta;
1829         ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1830
1831         /* Tear down pre-existing port->queue links */
1832         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1833                 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1834
1835         dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1836
1837         return 0;
1838 }
1839
1840 static int16_t
1841 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1842                             uint32_t qm_port_id,
1843                             uint16_t qm_qid,
1844                             uint8_t priority)
1845 {
1846         struct dlb2_map_qid_args cfg;
1847         int32_t ret;
1848
1849         if (handle == NULL)
1850                 return -EINVAL;
1851
1852         /* Build message */
1853         cfg.port_id = qm_port_id;
1854         cfg.qid = qm_qid;
1855         cfg.priority = EV_TO_DLB2_PRIO(priority);
1856
1857         ret = dlb2_iface_map_qid(handle, &cfg);
1858         if (ret < 0) {
1859                 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1860                              ret, dlb2_error_strings[cfg.response.status]);
1861                 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1862                              handle->domain_id, cfg.port_id,
1863                              cfg.qid,
1864                              cfg.priority);
1865         } else {
1866                 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1867                              qm_qid, qm_port_id);
1868         }
1869
1870         return ret;
1871 }
1872
1873 static int
1874 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1875                           struct dlb2_eventdev_port *ev_port,
1876                           struct dlb2_eventdev_queue *ev_queue,
1877                           uint8_t priority)
1878 {
1879         int first_avail = -1;
1880         int ret, i;
1881
1882         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1883                 if (ev_port->link[i].valid) {
1884                         if (ev_port->link[i].queue_id == ev_queue->id &&
1885                             ev_port->link[i].priority == priority) {
1886                                 if (ev_port->link[i].mapped)
1887                                         return 0; /* already mapped */
1888                                 first_avail = i;
1889                         }
1890                 } else if (first_avail == -1)
1891                         first_avail = i;
1892         }
1893         if (first_avail == -1) {
1894                 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1895                              ev_port->qm_port.id);
1896                 return -EINVAL;
1897         }
1898
1899         ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1900                                           ev_port->qm_port.id,
1901                                           ev_queue->qm_queue.id,
1902                                           priority);
1903
1904         if (!ret)
1905                 ev_port->link[first_avail].mapped = true;
1906
1907         return ret;
1908 }
1909
1910 static int32_t
1911 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1912                          struct dlb2_eventdev_queue *ev_queue,
1913                          int32_t qm_port_id)
1914 {
1915         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1916         struct dlb2_create_dir_queue_args cfg;
1917         int32_t ret;
1918
1919         /* The directed port is always configured before its queue */
1920         cfg.port_id = qm_port_id;
1921
1922         if (ev_queue->depth_threshold == 0) {
1923                 cfg.depth_threshold = dlb2->default_depth_thresh;
1924                 ev_queue->depth_threshold =
1925                         dlb2->default_depth_thresh;
1926         } else
1927                 cfg.depth_threshold = ev_queue->depth_threshold;
1928
1929         ret = dlb2_iface_dir_queue_create(handle, &cfg);
1930         if (ret < 0) {
1931                 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1932                              ret, dlb2_error_strings[cfg.response.status]);
1933                 return -EINVAL;
1934         }
1935
1936         return cfg.response.id;
1937 }
1938
1939 static int
1940 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1941                               struct dlb2_eventdev_queue *ev_queue,
1942                               struct dlb2_eventdev_port *ev_port)
1943 {
1944         int32_t qm_qid;
1945
1946         qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1947
1948         if (qm_qid < 0) {
1949                 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1950                 return qm_qid;
1951         }
1952
1953         dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1954
1955         ev_queue->qm_queue.id = qm_qid;
1956
1957         return 0;
1958 }
1959
1960 static int
1961 dlb2_do_port_link(struct rte_eventdev *dev,
1962                   struct dlb2_eventdev_queue *ev_queue,
1963                   struct dlb2_eventdev_port *ev_port,
1964                   uint8_t prio)
1965 {
1966         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1967         int err;
1968
1969         /* Don't link until start time. */
1970         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1971                 return 0;
1972
1973         if (ev_queue->qm_queue.is_directed)
1974                 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1975         else
1976                 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1977
1978         if (err) {
1979                 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1980                              ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1981                              ev_queue->id, ev_port->id);
1982
1983                 rte_errno = err;
1984                 return -1;
1985         }
1986
1987         return 0;
1988 }
1989
1990 static int
1991 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1992                         uint8_t queue_id,
1993                         bool link_exists,
1994                         int index)
1995 {
1996         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1997         struct dlb2_eventdev_queue *ev_queue;
1998         bool port_is_dir, queue_is_dir;
1999
2000         if (queue_id > dlb2->num_queues) {
2001                 rte_errno = -EINVAL;
2002                 return -1;
2003         }
2004
2005         ev_queue = &dlb2->ev_queues[queue_id];
2006
2007         if (!ev_queue->setup_done &&
2008             ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
2009                 rte_errno = -EINVAL;
2010                 return -1;
2011         }
2012
2013         port_is_dir = ev_port->qm_port.is_directed;
2014         queue_is_dir = ev_queue->qm_queue.is_directed;
2015
2016         if (port_is_dir != queue_is_dir) {
2017                 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
2018                              queue_is_dir ? "DIR" : "LDB", ev_queue->id,
2019                              port_is_dir ? "DIR" : "LDB", ev_port->id);
2020
2021                 rte_errno = -EINVAL;
2022                 return -1;
2023         }
2024
2025         /* Check if there is space for the requested link */
2026         if (!link_exists && index == -1) {
2027                 DLB2_LOG_ERR("no space for new link\n");
2028                 rte_errno = -ENOSPC;
2029                 return -1;
2030         }
2031
2032         /* Check if the directed port is already linked */
2033         if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2034             !link_exists) {
2035                 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2036                              ev_port->id);
2037                 rte_errno = -EINVAL;
2038                 return -1;
2039         }
2040
2041         /* Check if the directed queue is already linked */
2042         if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2043             !link_exists) {
2044                 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2045                              ev_queue->id);
2046                 rte_errno = -EINVAL;
2047                 return -1;
2048         }
2049
2050         return 0;
2051 }
2052
2053 static int
2054 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2055                         const uint8_t queues[], const uint8_t priorities[],
2056                         uint16_t nb_links)
2057
2058 {
2059         struct dlb2_eventdev_port *ev_port = event_port;
2060         struct dlb2_eventdev *dlb2;
2061         int i, j;
2062
2063         RTE_SET_USED(dev);
2064
2065         if (ev_port == NULL) {
2066                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2067                 rte_errno = -EINVAL;
2068                 return 0;
2069         }
2070
2071         if (!ev_port->setup_done &&
2072             ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2073                 DLB2_LOG_ERR("dlb2: evport not setup\n");
2074                 rte_errno = -EINVAL;
2075                 return 0;
2076         }
2077
2078         /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2079          * queues pointer.
2080          */
2081         if (nb_links == 0) {
2082                 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2083                 return 0; /* Ignore and return success */
2084         }
2085
2086         dlb2 = ev_port->dlb2;
2087
2088         DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2089                      nb_links,
2090                      ev_port->qm_port.is_directed ? "DIR" : "LDB",
2091                      ev_port->id);
2092
2093         for (i = 0; i < nb_links; i++) {
2094                 struct dlb2_eventdev_queue *ev_queue;
2095                 uint8_t queue_id, prio;
2096                 bool found = false;
2097                 int index = -1;
2098
2099                 queue_id = queues[i];
2100                 prio = priorities[i];
2101
2102                 /* Check if the link already exists. */
2103                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2104                         if (ev_port->link[j].valid) {
2105                                 if (ev_port->link[j].queue_id == queue_id) {
2106                                         found = true;
2107                                         index = j;
2108                                         break;
2109                                 }
2110                         } else if (index == -1) {
2111                                 index = j;
2112                         }
2113
2114                 /* could not link */
2115                 if (index == -1)
2116                         break;
2117
2118                 /* Check if already linked at the requested priority */
2119                 if (found && ev_port->link[j].priority == prio)
2120                         continue;
2121
2122                 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2123                         break; /* return index of offending queue */
2124
2125                 ev_queue = &dlb2->ev_queues[queue_id];
2126
2127                 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2128                         break; /* return index of offending queue */
2129
2130                 ev_queue->num_links++;
2131
2132                 ev_port->link[index].queue_id = queue_id;
2133                 ev_port->link[index].priority = prio;
2134                 ev_port->link[index].valid = true;
2135                 /* Entry already exists?  If so, then must be prio change */
2136                 if (!found)
2137                         ev_port->num_links++;
2138         }
2139         return i;
2140 }
2141
2142 static int16_t
2143 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2144                                 uint32_t qm_port_id,
2145                                 uint16_t qm_qid)
2146 {
2147         struct dlb2_unmap_qid_args cfg;
2148         int32_t ret;
2149
2150         if (handle == NULL)
2151                 return -EINVAL;
2152
2153         cfg.port_id = qm_port_id;
2154         cfg.qid = qm_qid;
2155
2156         ret = dlb2_iface_unmap_qid(handle, &cfg);
2157         if (ret < 0)
2158                 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2159                              ret, dlb2_error_strings[cfg.response.status]);
2160
2161         return ret;
2162 }
2163
2164 static int
2165 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2166                             struct dlb2_eventdev_port *ev_port,
2167                             struct dlb2_eventdev_queue *ev_queue)
2168 {
2169         int ret, i;
2170
2171         /* Don't unlink until start time. */
2172         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2173                 return 0;
2174
2175         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2176                 if (ev_port->link[i].valid &&
2177                     ev_port->link[i].queue_id == ev_queue->id)
2178                         break; /* found */
2179         }
2180
2181         /* This is expected with eventdev API!
2182          * It blindly attempts to unmap all queues.
2183          */
2184         if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2185                 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2186                              ev_queue->qm_queue.id,
2187                              ev_port->qm_port.id);
2188                 return 0;
2189         }
2190
2191         ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2192                                               ev_port->qm_port.id,
2193                                               ev_queue->qm_queue.id);
2194         if (!ret)
2195                 ev_port->link[i].mapped = false;
2196
2197         return ret;
2198 }
2199
2200 static int
2201 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2202                           uint8_t queues[], uint16_t nb_unlinks)
2203 {
2204         struct dlb2_eventdev_port *ev_port = event_port;
2205         struct dlb2_eventdev *dlb2;
2206         int i;
2207
2208         RTE_SET_USED(dev);
2209
2210         if (!ev_port->setup_done) {
2211                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2212                              ev_port->id);
2213                 rte_errno = -EINVAL;
2214                 return 0;
2215         }
2216
2217         if (queues == NULL || nb_unlinks == 0) {
2218                 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2219                 return 0; /* Ignore and return success */
2220         }
2221
2222         if (ev_port->qm_port.is_directed) {
2223                 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2224                              ev_port->id);
2225                 rte_errno = 0;
2226                 return nb_unlinks; /* as if success */
2227         }
2228
2229         dlb2 = ev_port->dlb2;
2230
2231         for (i = 0; i < nb_unlinks; i++) {
2232                 struct dlb2_eventdev_queue *ev_queue;
2233                 int ret, j;
2234
2235                 if (queues[i] >= dlb2->num_queues) {
2236                         DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2237                         rte_errno = -EINVAL;
2238                         return i; /* return index of offending queue */
2239                 }
2240
2241                 ev_queue = &dlb2->ev_queues[queues[i]];
2242
2243                 /* Does a link exist? */
2244                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2245                         if (ev_port->link[j].queue_id == queues[i] &&
2246                             ev_port->link[j].valid)
2247                                 break;
2248
2249                 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2250                         continue;
2251
2252                 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2253                 if (ret) {
2254                         DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2255                                      ret, ev_port->id, queues[i]);
2256                         rte_errno = -ENOENT;
2257                         return i; /* return index of offending queue */
2258                 }
2259
2260                 ev_port->link[j].valid = false;
2261                 ev_port->num_links--;
2262                 ev_queue->num_links--;
2263         }
2264
2265         return nb_unlinks;
2266 }
2267
2268 static int
2269 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2270                                        void *event_port)
2271 {
2272         struct dlb2_eventdev_port *ev_port = event_port;
2273         struct dlb2_eventdev *dlb2;
2274         struct dlb2_hw_dev *handle;
2275         struct dlb2_pending_port_unmaps_args cfg;
2276         int ret;
2277
2278         RTE_SET_USED(dev);
2279
2280         if (!ev_port->setup_done) {
2281                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2282                              ev_port->id);
2283                 rte_errno = -EINVAL;
2284                 return 0;
2285         }
2286
2287         cfg.port_id = ev_port->qm_port.id;
2288         dlb2 = ev_port->dlb2;
2289         handle = &dlb2->qm_instance;
2290         ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2291
2292         if (ret < 0) {
2293                 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2294                              ret, dlb2_error_strings[cfg.response.status]);
2295                 return ret;
2296         }
2297
2298         return cfg.response.id;
2299 }
2300
2301 static int
2302 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2303 {
2304         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2305         int ret, i;
2306
2307         /* If an event queue or port was previously configured, but hasn't been
2308          * reconfigured, reapply its original configuration.
2309          */
2310         for (i = 0; i < dlb2->num_queues; i++) {
2311                 struct dlb2_eventdev_queue *ev_queue;
2312
2313                 ev_queue = &dlb2->ev_queues[i];
2314
2315                 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2316                         continue;
2317
2318                 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2319                 if (ret < 0) {
2320                         DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2321                         return ret;
2322                 }
2323         }
2324
2325         for (i = 0; i < dlb2->num_ports; i++) {
2326                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2327
2328                 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2329                         continue;
2330
2331                 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2332                 if (ret < 0) {
2333                         DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2334                                      i);
2335                         return ret;
2336                 }
2337         }
2338
2339         return 0;
2340 }
2341
2342 static int
2343 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2344 {
2345         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2346         int i;
2347
2348         /* Perform requested port->queue links */
2349         for (i = 0; i < dlb2->num_ports; i++) {
2350                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2351                 int j;
2352
2353                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2354                         struct dlb2_eventdev_queue *ev_queue;
2355                         uint8_t prio, queue_id;
2356
2357                         if (!ev_port->link[j].valid)
2358                                 continue;
2359
2360                         prio = ev_port->link[j].priority;
2361                         queue_id = ev_port->link[j].queue_id;
2362
2363                         if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2364                                 return -EINVAL;
2365
2366                         ev_queue = &dlb2->ev_queues[queue_id];
2367
2368                         if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2369                                 return -EINVAL;
2370                 }
2371         }
2372
2373         return 0;
2374 }
2375
2376 static int
2377 dlb2_eventdev_start(struct rte_eventdev *dev)
2378 {
2379         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2380         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2381         struct dlb2_start_domain_args cfg;
2382         int ret, i;
2383
2384         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2385         if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2386                 DLB2_LOG_ERR("bad state %d for dev_start\n",
2387                              (int)dlb2->run_state);
2388                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2389                 return -EINVAL;
2390         }
2391         dlb2->run_state = DLB2_RUN_STATE_STARTING;
2392         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2393
2394         /* If the device was configured more than once, some event ports and/or
2395          * queues may need to be reconfigured.
2396          */
2397         ret = dlb2_eventdev_reapply_configuration(dev);
2398         if (ret)
2399                 return ret;
2400
2401         /* The DLB PMD delays port links until the device is started. */
2402         ret = dlb2_eventdev_apply_port_links(dev);
2403         if (ret)
2404                 return ret;
2405
2406         for (i = 0; i < dlb2->num_ports; i++) {
2407                 if (!dlb2->ev_ports[i].setup_done) {
2408                         DLB2_LOG_ERR("dlb2: port %d not setup", i);
2409                         return -ESTALE;
2410                 }
2411         }
2412
2413         for (i = 0; i < dlb2->num_queues; i++) {
2414                 if (dlb2->ev_queues[i].num_links == 0) {
2415                         DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2416                         return -ENOLINK;
2417                 }
2418         }
2419
2420         ret = dlb2_iface_sched_domain_start(handle, &cfg);
2421         if (ret < 0) {
2422                 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2423                              ret, dlb2_error_strings[cfg.response.status]);
2424                 return ret;
2425         }
2426
2427         dlb2->run_state = DLB2_RUN_STATE_STARTED;
2428         DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2429
2430         return 0;
2431 }
2432
2433 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2434         {
2435                 /* Load-balanced cmd bytes */
2436                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2437                 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2438                 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2439         },
2440         {
2441                 /* Directed cmd bytes */
2442                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2443                 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2444                 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2445         },
2446 };
2447
2448 static inline uint32_t
2449 dlb2_port_credits_get(struct dlb2_port *qm_port,
2450                       enum dlb2_hw_queue_types type)
2451 {
2452         uint32_t credits = *qm_port->credit_pool[type];
2453         /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2454         uint32_t batch_size = qm_port->hw_credit_quanta;
2455
2456         if (unlikely(credits < batch_size))
2457                 batch_size = credits;
2458
2459         if (likely(credits &&
2460                    __atomic_compare_exchange_n(
2461                         qm_port->credit_pool[type],
2462                         &credits, credits - batch_size, false,
2463                         __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2464                 return batch_size;
2465         else
2466                 return 0;
2467 }
2468
2469 static inline void
2470 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2471                           struct dlb2_eventdev_port *ev_port)
2472 {
2473         uint16_t quanta = ev_port->credit_update_quanta;
2474
2475         if (ev_port->inflight_credits >= quanta * 2) {
2476                 /* Replenish credits, saving one quanta for enqueues */
2477                 uint16_t val = ev_port->inflight_credits - quanta;
2478
2479                 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2480                 ev_port->inflight_credits -= val;
2481         }
2482 }
2483
2484 static inline int
2485 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2486                               struct dlb2_eventdev_port *ev_port)
2487 {
2488         uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2489                                                 __ATOMIC_SEQ_CST);
2490         const int num = 1;
2491
2492         if (unlikely(ev_port->inflight_max < sw_inflights)) {
2493                 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2494                 rte_errno = -ENOSPC;
2495                 return 1;
2496         }
2497
2498         if (ev_port->inflight_credits < num) {
2499                 /* check if event enqueue brings ev_port over max threshold */
2500                 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2501
2502                 if (sw_inflights + credit_update_quanta >
2503                                 dlb2->new_event_limit) {
2504                         DLB2_INC_STAT(
2505                         ev_port->stats.traffic.tx_nospc_new_event_limit,
2506                         1);
2507                         rte_errno = -ENOSPC;
2508                         return 1;
2509                 }
2510
2511                 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2512                                    __ATOMIC_SEQ_CST);
2513                 ev_port->inflight_credits += (credit_update_quanta);
2514
2515                 if (ev_port->inflight_credits < num) {
2516                         DLB2_INC_STAT(
2517                         ev_port->stats.traffic.tx_nospc_inflight_credits,
2518                         1);
2519                         rte_errno = -ENOSPC;
2520                         return 1;
2521                 }
2522         }
2523
2524         return 0;
2525 }
2526
2527 static inline int
2528 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2529 {
2530         if (unlikely(qm_port->cached_ldb_credits == 0)) {
2531                 qm_port->cached_ldb_credits =
2532                         dlb2_port_credits_get(qm_port,
2533                                               DLB2_LDB_QUEUE);
2534                 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2535                         DLB2_INC_STAT(
2536                         qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2537                         1);
2538                         DLB2_LOG_DBG("ldb credits exhausted\n");
2539                         return 1; /* credits exhausted */
2540                 }
2541         }
2542
2543         return 0;
2544 }
2545
2546 static inline int
2547 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2548 {
2549         if (unlikely(qm_port->cached_dir_credits == 0)) {
2550                 qm_port->cached_dir_credits =
2551                         dlb2_port_credits_get(qm_port,
2552                                               DLB2_DIR_QUEUE);
2553                 if (unlikely(qm_port->cached_dir_credits == 0)) {
2554                         DLB2_INC_STAT(
2555                         qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2556                         1);
2557                         DLB2_LOG_DBG("dir credits exhausted\n");
2558                         return 1; /* credits exhausted */
2559                 }
2560         }
2561
2562         return 0;
2563 }
2564
2565 static inline int
2566 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2567 {
2568         if (unlikely(qm_port->cached_credits == 0)) {
2569                 qm_port->cached_credits =
2570                         dlb2_port_credits_get(qm_port,
2571                                               DLB2_COMBINED_POOL);
2572                 if (unlikely(qm_port->cached_credits == 0)) {
2573                         DLB2_INC_STAT(
2574                         qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2575                         DLB2_LOG_DBG("credits exhausted\n");
2576                         return 1; /* credits exhausted */
2577                 }
2578         }
2579
2580         return 0;
2581 }
2582
2583 static __rte_always_inline void
2584 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2585               struct process_local_port_data *port_data)
2586 {
2587         dlb2_movdir64b(port_data->pp_addr, qe4);
2588 }
2589
2590 static inline int
2591 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2592 {
2593         struct process_local_port_data *port_data;
2594         struct dlb2_cq_pop_qe *qe;
2595
2596         RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2597
2598         qe = qm_port->consume_qe;
2599
2600         qe->tokens = num - 1;
2601
2602         /* No store fence needed since no pointer is being sent, and CQ token
2603          * pops can be safely reordered with other HCWs.
2604          */
2605         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2606
2607         dlb2_movntdq_single(port_data->pp_addr, qe);
2608
2609         DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2610
2611         qm_port->owed_tokens = 0;
2612
2613         return 0;
2614 }
2615
2616 static inline void
2617 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2618                    bool do_sfence,
2619                    struct process_local_port_data *port_data)
2620 {
2621         /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2622          * application writes complete before enqueueing the QE.
2623          */
2624         if (do_sfence)
2625                 rte_wmb();
2626
2627         dlb2_pp_write(qm_port->qe4, port_data);
2628 }
2629
2630 static inline void
2631 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2632 {
2633         struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2634         int num = qm_port->owed_tokens;
2635
2636         qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2637         qe[idx].tokens = num - 1;
2638
2639         qm_port->owed_tokens = 0;
2640 }
2641
2642 static inline void
2643 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2644                       const struct rte_event ev[],
2645                       int num,
2646                       uint8_t *sched_type,
2647                       uint8_t *queue_id)
2648 {
2649         struct dlb2_enqueue_qe *qe;
2650         uint16_t sched_word[4];
2651         __m128i sse_qe[2];
2652         int i;
2653
2654         qe = qm_port->qe4;
2655
2656         sse_qe[0] = _mm_setzero_si128();
2657         sse_qe[1] = _mm_setzero_si128();
2658
2659         switch (num) {
2660         case 4:
2661                 /* Construct the metadata portion of two HCWs in one 128b SSE
2662                  * register. HCW metadata is constructed in the SSE registers
2663                  * like so:
2664                  * sse_qe[0][63:0]:   qe[0]'s metadata
2665                  * sse_qe[0][127:64]: qe[1]'s metadata
2666                  * sse_qe[1][63:0]:   qe[2]'s metadata
2667                  * sse_qe[1][127:64]: qe[3]'s metadata
2668                  */
2669
2670                 /* Convert the event operation into a command byte and store it
2671                  * in the metadata:
2672                  * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
2673                  * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2674                  * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
2675                  * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2676                  */
2677 #define DLB2_QE_CMD_BYTE 7
2678                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2679                                 cmd_byte_map[qm_port->is_directed][ev[0].op],
2680                                 DLB2_QE_CMD_BYTE);
2681                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2682                                 cmd_byte_map[qm_port->is_directed][ev[1].op],
2683                                 DLB2_QE_CMD_BYTE + 8);
2684                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2685                                 cmd_byte_map[qm_port->is_directed][ev[2].op],
2686                                 DLB2_QE_CMD_BYTE);
2687                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2688                                 cmd_byte_map[qm_port->is_directed][ev[3].op],
2689                                 DLB2_QE_CMD_BYTE + 8);
2690
2691                 /* Store priority, scheduling type, and queue ID in the sched
2692                  * word array because these values are re-used when the
2693                  * destination is a directed queue.
2694                  */
2695                 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2696                                 sched_type[0] << 8 |
2697                                 queue_id[0];
2698                 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2699                                 sched_type[1] << 8 |
2700                                 queue_id[1];
2701                 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2702                                 sched_type[2] << 8 |
2703                                 queue_id[2];
2704                 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2705                                 sched_type[3] << 8 |
2706                                 queue_id[3];
2707
2708                 /* Store the event priority, scheduling type, and queue ID in
2709                  * the metadata:
2710                  * sse_qe[0][31:16] = sched_word[0]
2711                  * sse_qe[0][95:80] = sched_word[1]
2712                  * sse_qe[1][31:16] = sched_word[2]
2713                  * sse_qe[1][95:80] = sched_word[3]
2714                  */
2715 #define DLB2_QE_QID_SCHED_WORD 1
2716                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2717                                              sched_word[0],
2718                                              DLB2_QE_QID_SCHED_WORD);
2719                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2720                                              sched_word[1],
2721                                              DLB2_QE_QID_SCHED_WORD + 4);
2722                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2723                                              sched_word[2],
2724                                              DLB2_QE_QID_SCHED_WORD);
2725                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2726                                              sched_word[3],
2727                                              DLB2_QE_QID_SCHED_WORD + 4);
2728
2729                 /* If the destination is a load-balanced queue, store the lock
2730                  * ID. If it is a directed queue, DLB places this field in
2731                  * bytes 10-11 of the received QE, so we format it accordingly:
2732                  * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
2733                  * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2734                  * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
2735                  * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2736                  */
2737 #define DLB2_QE_LOCK_ID_WORD 2
2738                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2739                                 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2740                                         sched_word[0] : ev[0].flow_id,
2741                                 DLB2_QE_LOCK_ID_WORD);
2742                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2743                                 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2744                                         sched_word[1] : ev[1].flow_id,
2745                                 DLB2_QE_LOCK_ID_WORD + 4);
2746                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2747                                 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2748                                         sched_word[2] : ev[2].flow_id,
2749                                 DLB2_QE_LOCK_ID_WORD);
2750                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2751                                 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2752                                         sched_word[3] : ev[3].flow_id,
2753                                 DLB2_QE_LOCK_ID_WORD + 4);
2754
2755                 /* Store the event type and sub event type in the metadata:
2756                  * sse_qe[0][15:0]  = flow_id[0]
2757                  * sse_qe[0][79:64] = flow_id[1]
2758                  * sse_qe[1][15:0]  = flow_id[2]
2759                  * sse_qe[1][79:64] = flow_id[3]
2760                  */
2761 #define DLB2_QE_EV_TYPE_WORD 0
2762                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2763                                              ev[0].sub_event_type << 8 |
2764                                                 ev[0].event_type,
2765                                              DLB2_QE_EV_TYPE_WORD);
2766                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2767                                              ev[1].sub_event_type << 8 |
2768                                                 ev[1].event_type,
2769                                              DLB2_QE_EV_TYPE_WORD + 4);
2770                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2771                                              ev[2].sub_event_type << 8 |
2772                                                 ev[2].event_type,
2773                                              DLB2_QE_EV_TYPE_WORD);
2774                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2775                                              ev[3].sub_event_type << 8 |
2776                                                 ev[3].event_type,
2777                                              DLB2_QE_EV_TYPE_WORD + 4);
2778
2779                 /* Store the metadata to memory (use the double-precision
2780                  * _mm_storeh_pd because there is no integer function for
2781                  * storing the upper 64b):
2782                  * qe[0] metadata = sse_qe[0][63:0]
2783                  * qe[1] metadata = sse_qe[0][127:64]
2784                  * qe[2] metadata = sse_qe[1][63:0]
2785                  * qe[3] metadata = sse_qe[1][127:64]
2786                  */
2787                 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2788                 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2789                               (__m128d)sse_qe[0]);
2790                 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2791                 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2792                               (__m128d)sse_qe[1]);
2793
2794                 qe[0].data = ev[0].u64;
2795                 qe[1].data = ev[1].u64;
2796                 qe[2].data = ev[2].u64;
2797                 qe[3].data = ev[3].u64;
2798
2799                 break;
2800         case 3:
2801         case 2:
2802         case 1:
2803                 for (i = 0; i < num; i++) {
2804                         qe[i].cmd_byte =
2805                                 cmd_byte_map[qm_port->is_directed][ev[i].op];
2806                         qe[i].sched_type = sched_type[i];
2807                         qe[i].data = ev[i].u64;
2808                         qe[i].qid = queue_id[i];
2809                         qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2810                         qe[i].lock_id = ev[i].flow_id;
2811                         if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2812                                 struct dlb2_msg_info *info =
2813                                         (struct dlb2_msg_info *)&qe[i].lock_id;
2814
2815                                 info->qid = queue_id[i];
2816                                 info->sched_type = DLB2_SCHED_DIRECTED;
2817                                 info->priority = qe[i].priority;
2818                         }
2819                         qe[i].u.event_type.major = ev[i].event_type;
2820                         qe[i].u.event_type.sub = ev[i].sub_event_type;
2821                 }
2822                 break;
2823         case 0:
2824                 break;
2825         }
2826 }
2827
2828 static inline int
2829 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2830                         struct dlb2_port *qm_port,
2831                         const struct rte_event ev[],
2832                         uint8_t *sched_type,
2833                         uint8_t *queue_id)
2834 {
2835         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2836         struct dlb2_eventdev_queue *ev_queue;
2837         uint16_t *cached_credits = NULL;
2838         struct dlb2_queue *qm_queue;
2839
2840         ev_queue = &dlb2->ev_queues[ev->queue_id];
2841         qm_queue = &ev_queue->qm_queue;
2842         *queue_id = qm_queue->id;
2843
2844         /* Ignore sched_type and hardware credits on release events */
2845         if (ev->op == RTE_EVENT_OP_RELEASE)
2846                 goto op_check;
2847
2848         if (!qm_queue->is_directed) {
2849                 /* Load balanced destination queue */
2850
2851                 if (dlb2->version == DLB2_HW_V2) {
2852                         if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2853                                 rte_errno = -ENOSPC;
2854                                 return 1;
2855                         }
2856                         cached_credits = &qm_port->cached_ldb_credits;
2857                 } else {
2858                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2859                                 rte_errno = -ENOSPC;
2860                                 return 1;
2861                         }
2862                         cached_credits = &qm_port->cached_credits;
2863                 }
2864                 switch (ev->sched_type) {
2865                 case RTE_SCHED_TYPE_ORDERED:
2866                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2867                         if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2868                                 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2869                                              *queue_id);
2870                                 rte_errno = -EINVAL;
2871                                 return 1;
2872                         }
2873                         *sched_type = DLB2_SCHED_ORDERED;
2874                         break;
2875                 case RTE_SCHED_TYPE_ATOMIC:
2876                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2877                         *sched_type = DLB2_SCHED_ATOMIC;
2878                         break;
2879                 case RTE_SCHED_TYPE_PARALLEL:
2880                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2881                         if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2882                                 *sched_type = DLB2_SCHED_ORDERED;
2883                         else
2884                                 *sched_type = DLB2_SCHED_UNORDERED;
2885                         break;
2886                 default:
2887                         DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2888                         DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2889                         rte_errno = -EINVAL;
2890                         return 1;
2891                 }
2892         } else {
2893                 /* Directed destination queue */
2894
2895                 if (dlb2->version == DLB2_HW_V2) {
2896                         if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2897                                 rte_errno = -ENOSPC;
2898                                 return 1;
2899                         }
2900                         cached_credits = &qm_port->cached_dir_credits;
2901                 } else {
2902                         if (dlb2_check_enqueue_hw_credits(qm_port)) {
2903                                 rte_errno = -ENOSPC;
2904                                 return 1;
2905                         }
2906                         cached_credits = &qm_port->cached_credits;
2907                 }
2908                 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2909
2910                 *sched_type = DLB2_SCHED_DIRECTED;
2911         }
2912
2913 op_check:
2914         switch (ev->op) {
2915         case RTE_EVENT_OP_NEW:
2916                 /* Check that a sw credit is available */
2917                 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2918                         rte_errno = -ENOSPC;
2919                         return 1;
2920                 }
2921                 ev_port->inflight_credits--;
2922                 (*cached_credits)--;
2923                 break;
2924         case RTE_EVENT_OP_FORWARD:
2925                 /* Check for outstanding_releases underflow. If this occurs,
2926                  * the application is not using the EVENT_OPs correctly; for
2927                  * example, forwarding or releasing events that were not
2928                  * dequeued.
2929                  */
2930                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2931                 ev_port->outstanding_releases--;
2932                 qm_port->issued_releases++;
2933                 (*cached_credits)--;
2934                 break;
2935         case RTE_EVENT_OP_RELEASE:
2936                 ev_port->inflight_credits++;
2937                 /* Check for outstanding_releases underflow. If this occurs,
2938                  * the application is not using the EVENT_OPs correctly; for
2939                  * example, forwarding or releasing events that were not
2940                  * dequeued.
2941                  */
2942                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2943                 ev_port->outstanding_releases--;
2944                 qm_port->issued_releases++;
2945
2946                 /* Replenish s/w credits if enough are cached */
2947                 dlb2_replenish_sw_credits(dlb2, ev_port);
2948                 break;
2949         }
2950
2951         DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2952         DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2953
2954 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2955         if (ev->op != RTE_EVENT_OP_RELEASE) {
2956                 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2957                 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2958         }
2959 #endif
2960
2961         return 0;
2962 }
2963
2964 static inline uint16_t
2965 __dlb2_event_enqueue_burst(void *event_port,
2966                            const struct rte_event events[],
2967                            uint16_t num,
2968                            bool use_delayed)
2969 {
2970         struct dlb2_eventdev_port *ev_port = event_port;
2971         struct dlb2_port *qm_port = &ev_port->qm_port;
2972         struct process_local_port_data *port_data;
2973         int i;
2974
2975         RTE_ASSERT(ev_port->enq_configured);
2976         RTE_ASSERT(events != NULL);
2977
2978         i = 0;
2979
2980         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2981
2982         while (i < num) {
2983                 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2984                 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2985                 int pop_offs = 0;
2986                 int j = 0;
2987
2988                 memset(qm_port->qe4,
2989                        0,
2990                        DLB2_NUM_QES_PER_CACHE_LINE *
2991                        sizeof(struct dlb2_enqueue_qe));
2992
2993                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2994                         const struct rte_event *ev = &events[i + j];
2995                         int16_t thresh = qm_port->token_pop_thresh;
2996
2997                         if (use_delayed &&
2998                             qm_port->token_pop_mode == DELAYED_POP &&
2999                             (ev->op == RTE_EVENT_OP_FORWARD ||
3000                              ev->op == RTE_EVENT_OP_RELEASE) &&
3001                             qm_port->issued_releases >= thresh - 1) {
3002                                 /* Insert the token pop QE and break out. This
3003                                  * may result in a partial HCW, but that is
3004                                  * simpler than supporting arbitrary QE
3005                                  * insertion.
3006                                  */
3007                                 dlb2_construct_token_pop_qe(qm_port, j);
3008
3009                                 /* Reset the releases for the next QE batch */
3010                                 qm_port->issued_releases -= thresh;
3011
3012                                 pop_offs = 1;
3013                                 j++;
3014                                 break;
3015                         }
3016
3017                         if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
3018                                                     &sched_types[j],
3019                                                     &queue_ids[j]))
3020                                 break;
3021                 }
3022
3023                 if (j == 0)
3024                         break;
3025
3026                 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
3027                                       sched_types, queue_ids);
3028
3029                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3030
3031                 /* Don't include the token pop QE in the enqueue count */
3032                 i += j - pop_offs;
3033
3034                 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3035                  * pop_offs != 0
3036                  */
3037                 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3038                         break;
3039         }
3040
3041         return i;
3042 }
3043
3044 static uint16_t
3045 dlb2_event_enqueue_burst(void *event_port,
3046                              const struct rte_event events[],
3047                              uint16_t num)
3048 {
3049         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3050 }
3051
3052 static uint16_t
3053 dlb2_event_enqueue_burst_delayed(void *event_port,
3054                                      const struct rte_event events[],
3055                                      uint16_t num)
3056 {
3057         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3058 }
3059
3060 static inline uint16_t
3061 dlb2_event_enqueue(void *event_port,
3062                    const struct rte_event events[])
3063 {
3064         return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3065 }
3066
3067 static inline uint16_t
3068 dlb2_event_enqueue_delayed(void *event_port,
3069                            const struct rte_event events[])
3070 {
3071         return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3072 }
3073
3074 static uint16_t
3075 dlb2_event_enqueue_new_burst(void *event_port,
3076                              const struct rte_event events[],
3077                              uint16_t num)
3078 {
3079         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3080 }
3081
3082 static uint16_t
3083 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3084                                      const struct rte_event events[],
3085                                      uint16_t num)
3086 {
3087         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3088 }
3089
3090 static uint16_t
3091 dlb2_event_enqueue_forward_burst(void *event_port,
3092                                  const struct rte_event events[],
3093                                  uint16_t num)
3094 {
3095         return __dlb2_event_enqueue_burst(event_port, events, num, false);
3096 }
3097
3098 static uint16_t
3099 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3100                                          const struct rte_event events[],
3101                                          uint16_t num)
3102 {
3103         return __dlb2_event_enqueue_burst(event_port, events, num, true);
3104 }
3105
3106 static void
3107 dlb2_event_release(struct dlb2_eventdev *dlb2,
3108                    uint8_t port_id,
3109                    int n)
3110 {
3111         struct process_local_port_data *port_data;
3112         struct dlb2_eventdev_port *ev_port;
3113         struct dlb2_port *qm_port;
3114         int i;
3115
3116         if (port_id > dlb2->num_ports) {
3117                 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3118                              port_id);
3119                 rte_errno = -EINVAL;
3120                 return;
3121         }
3122
3123         ev_port = &dlb2->ev_ports[port_id];
3124         qm_port = &ev_port->qm_port;
3125         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3126
3127         i = 0;
3128
3129         if (qm_port->is_directed) {
3130                 i = n;
3131                 goto sw_credit_update;
3132         }
3133
3134         while (i < n) {
3135                 int pop_offs = 0;
3136                 int j = 0;
3137
3138                 /* Zero-out QEs */
3139                 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3140                 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3141                 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3142                 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3143
3144
3145                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3146                         int16_t thresh = qm_port->token_pop_thresh;
3147
3148                         if (qm_port->token_pop_mode == DELAYED_POP &&
3149                             qm_port->issued_releases >= thresh - 1) {
3150                                 /* Insert the token pop QE */
3151                                 dlb2_construct_token_pop_qe(qm_port, j);
3152
3153                                 /* Reset the releases for the next QE batch */
3154                                 qm_port->issued_releases -= thresh;
3155
3156                                 pop_offs = 1;
3157                                 j++;
3158                                 break;
3159                         }
3160
3161                         qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3162                         qm_port->issued_releases++;
3163                 }
3164
3165                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3166
3167                 /* Don't include the token pop QE in the release count */
3168                 i += j - pop_offs;
3169         }
3170
3171 sw_credit_update:
3172         /* each release returns one credit */
3173         if (unlikely(!ev_port->outstanding_releases)) {
3174                 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3175                              __func__);
3176                 return;
3177         }
3178         ev_port->outstanding_releases -= i;
3179         ev_port->inflight_credits += i;
3180
3181         /* Replenish s/w credits if enough releases are performed */
3182         dlb2_replenish_sw_credits(dlb2, ev_port);
3183 }
3184
3185 static inline void
3186 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3187 {
3188         uint32_t batch_size = qm_port->hw_credit_quanta;
3189
3190         /* increment port credits, and return to pool if exceeds threshold */
3191         if (!qm_port->is_directed) {
3192                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3193                         qm_port->cached_ldb_credits += num;
3194                         if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3195                                 __atomic_fetch_add(
3196                                         qm_port->credit_pool[DLB2_LDB_QUEUE],
3197                                         batch_size, __ATOMIC_SEQ_CST);
3198                                 qm_port->cached_ldb_credits -= batch_size;
3199                         }
3200                 } else {
3201                         qm_port->cached_credits += num;
3202                         if (qm_port->cached_credits >= 2 * batch_size) {
3203                                 __atomic_fetch_add(
3204                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3205                                       batch_size, __ATOMIC_SEQ_CST);
3206                                 qm_port->cached_credits -= batch_size;
3207                         }
3208                 }
3209         } else {
3210                 if (qm_port->dlb2->version == DLB2_HW_V2) {
3211                         qm_port->cached_dir_credits += num;
3212                         if (qm_port->cached_dir_credits >= 2 * batch_size) {
3213                                 __atomic_fetch_add(
3214                                         qm_port->credit_pool[DLB2_DIR_QUEUE],
3215                                         batch_size, __ATOMIC_SEQ_CST);
3216                                 qm_port->cached_dir_credits -= batch_size;
3217                         }
3218                 } else {
3219                         qm_port->cached_credits += num;
3220                         if (qm_port->cached_credits >= 2 * batch_size) {
3221                                 __atomic_fetch_add(
3222                                       qm_port->credit_pool[DLB2_COMBINED_POOL],
3223                                       batch_size, __ATOMIC_SEQ_CST);
3224                                 qm_port->cached_credits -= batch_size;
3225                         }
3226                 }
3227         }
3228 }
3229
3230 #define CLB_MASK_IDX 0
3231 #define CLB_VAL_IDX 1
3232 static int
3233 dlb2_monitor_callback(const uint64_t val,
3234                 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3235 {
3236         /* abort if the value matches */
3237         return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3238 }
3239
3240 static inline int
3241 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3242                   struct dlb2_eventdev_port *ev_port,
3243                   struct dlb2_port *qm_port,
3244                   uint64_t timeout,
3245                   uint64_t start_ticks)
3246 {
3247         struct process_local_port_data *port_data;
3248         uint64_t elapsed_ticks;
3249
3250         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3251
3252         elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3253
3254         /* Wait/poll time expired */
3255         if (elapsed_ticks >= timeout) {
3256                 return 1;
3257         } else if (dlb2->umwait_allowed) {
3258                 struct rte_power_monitor_cond pmc;
3259                 volatile struct dlb2_dequeue_qe *cq_base;
3260                 union {
3261                         uint64_t raw_qe[2];
3262                         struct dlb2_dequeue_qe qe;
3263                 } qe_mask;
3264                 uint64_t expected_value;
3265                 volatile uint64_t *monitor_addr;
3266
3267                 qe_mask.qe.cq_gen = 1; /* set mask */
3268
3269                 cq_base = port_data->cq_base;
3270                 monitor_addr = (volatile uint64_t *)(volatile void *)
3271                         &cq_base[qm_port->cq_idx];
3272                 monitor_addr++; /* cq_gen bit is in second 64bit location */
3273
3274                 if (qm_port->gen_bit)
3275                         expected_value = qe_mask.raw_qe[1];
3276                 else
3277                         expected_value = 0;
3278
3279                 pmc.addr = monitor_addr;
3280                 /* store expected value and comparison mask in opaque data */
3281                 pmc.opaque[CLB_VAL_IDX] = expected_value;
3282                 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3283                 /* set up callback */
3284                 pmc.fn = dlb2_monitor_callback;
3285                 pmc.size = sizeof(uint64_t);
3286
3287                 rte_power_monitor(&pmc, timeout + start_ticks);
3288
3289                 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3290         } else {
3291                 uint64_t poll_interval = dlb2->poll_interval;
3292                 uint64_t curr_ticks = rte_get_timer_cycles();
3293                 uint64_t init_ticks = curr_ticks;
3294
3295                 while ((curr_ticks - start_ticks < timeout) &&
3296                        (curr_ticks - init_ticks < poll_interval))
3297                         curr_ticks = rte_get_timer_cycles();
3298         }
3299
3300         return 0;
3301 }
3302
3303 static __rte_noinline int
3304 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3305                          struct dlb2_port *qm_port,
3306                          struct rte_event *events,
3307                          struct dlb2_dequeue_qe *qes,
3308                          int cnt)
3309 {
3310         uint8_t *qid_mappings = qm_port->qid_mappings;
3311         int i, num, evq_id;
3312
3313         for (i = 0, num = 0; i < cnt; i++) {
3314                 struct dlb2_dequeue_qe *qe = &qes[i];
3315                 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3316                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3317                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3318                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3319                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3320                 };
3321
3322                 /* Fill in event information.
3323                  * Note that flow_id must be embedded in the data by
3324                  * the app, such as the mbuf RSS hash field if the data
3325                  * buffer is a mbuf.
3326                  */
3327                 if (unlikely(qe->error)) {
3328                         DLB2_LOG_ERR("QE error bit ON\n");
3329                         DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3330                         dlb2_consume_qe_immediate(qm_port, 1);
3331                         continue; /* Ignore */
3332                 }
3333
3334                 events[num].u64 = qe->data;
3335                 events[num].flow_id = qe->flow_id;
3336                 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3337                 events[num].event_type = qe->u.event_type.major;
3338                 events[num].sub_event_type = qe->u.event_type.sub;
3339                 events[num].sched_type = sched_type_map[qe->sched_type];
3340                 events[num].impl_opaque = qe->qid_depth;
3341
3342                 /* qid not preserved for directed queues */
3343                 if (qm_port->is_directed)
3344                         evq_id = ev_port->link[0].queue_id;
3345                 else
3346                         evq_id = qid_mappings[qe->qid];
3347
3348                 events[num].queue_id = evq_id;
3349                 DLB2_INC_STAT(
3350                         ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3351                         1);
3352                 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3353                 num++;
3354         }
3355
3356         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3357
3358         return num;
3359 }
3360
3361 static inline int
3362 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3363                               struct dlb2_port *qm_port,
3364                               struct rte_event *events,
3365                               struct dlb2_dequeue_qe *qes)
3366 {
3367         int sched_type_map[] = {
3368                 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3369                 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3370                 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3371                 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3372         };
3373         const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3374         uint8_t *qid_mappings = qm_port->qid_mappings;
3375         __m128i sse_evt[2];
3376
3377         /* In the unlikely case that any of the QE error bits are set, process
3378          * them one at a time.
3379          */
3380         if (unlikely(qes[0].error || qes[1].error ||
3381                      qes[2].error || qes[3].error))
3382                 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3383                                                  qes, num_events);
3384
3385         events[0].u64 = qes[0].data;
3386         events[1].u64 = qes[1].data;
3387         events[2].u64 = qes[2].data;
3388         events[3].u64 = qes[3].data;
3389
3390         /* Construct the metadata portion of two struct rte_events
3391          * in one 128b SSE register. Event metadata is constructed in the SSE
3392          * registers like so:
3393          * sse_evt[0][63:0]:   event[0]'s metadata
3394          * sse_evt[0][127:64]: event[1]'s metadata
3395          * sse_evt[1][63:0]:   event[2]'s metadata
3396          * sse_evt[1][127:64]: event[3]'s metadata
3397          */
3398         sse_evt[0] = _mm_setzero_si128();
3399         sse_evt[1] = _mm_setzero_si128();
3400
3401         /* Convert the hardware queue ID to an event queue ID and store it in
3402          * the metadata:
3403          * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
3404          * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3405          * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
3406          * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3407          */
3408 #define DLB_EVENT_QUEUE_ID_BYTE 5
3409         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3410                                      qid_mappings[qes[0].qid],
3411                                      DLB_EVENT_QUEUE_ID_BYTE);
3412         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3413                                      qid_mappings[qes[1].qid],
3414                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3415         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3416                                      qid_mappings[qes[2].qid],
3417                                      DLB_EVENT_QUEUE_ID_BYTE);
3418         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3419                                      qid_mappings[qes[3].qid],
3420                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
3421
3422         /* Convert the hardware priority to an event priority and store it in
3423          * the metadata, while also returning the queue depth status
3424          * value captured by the hardware, storing it in impl_opaque, which can
3425          * be read by the application but not modified
3426          * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
3427          * sse_evt[0][63:56]   = qes[0].qid_depth
3428          * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3429          * sse_evt[0][127:120] = qes[1].qid_depth
3430          * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
3431          * sse_evt[1][63:56]   = qes[2].qid_depth
3432          * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3433          * sse_evt[1][127:120] = qes[3].qid_depth
3434          */
3435 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3436 #define DLB_BYTE_SHIFT 8
3437         sse_evt[0] =
3438                 _mm_insert_epi16(sse_evt[0],
3439                         DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3440                         (qes[0].qid_depth << DLB_BYTE_SHIFT),
3441                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3442         sse_evt[0] =
3443                 _mm_insert_epi16(sse_evt[0],
3444                         DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3445                         (qes[1].qid_depth << DLB_BYTE_SHIFT),
3446                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3447         sse_evt[1] =
3448                 _mm_insert_epi16(sse_evt[1],
3449                         DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3450                         (qes[2].qid_depth << DLB_BYTE_SHIFT),
3451                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3452         sse_evt[1] =
3453                 _mm_insert_epi16(sse_evt[1],
3454                         DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3455                         (qes[3].qid_depth << DLB_BYTE_SHIFT),
3456                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3457
3458         /* Write the event type, sub event type, and flow_id to the event
3459          * metadata.
3460          * sse_evt[0][31:0]   = qes[0].flow_id |
3461          *                      qes[0].u.event_type.major << 28 |
3462          *                      qes[0].u.event_type.sub << 20;
3463          * sse_evt[0][95:64]  = qes[1].flow_id |
3464          *                      qes[1].u.event_type.major << 28 |
3465          *                      qes[1].u.event_type.sub << 20;
3466          * sse_evt[1][31:0]   = qes[2].flow_id |
3467          *                      qes[2].u.event_type.major << 28 |
3468          *                      qes[2].u.event_type.sub << 20;
3469          * sse_evt[1][95:64]  = qes[3].flow_id |
3470          *                      qes[3].u.event_type.major << 28 |
3471          *                      qes[3].u.event_type.sub << 20;
3472          */
3473 #define DLB_EVENT_EV_TYPE_DW 0
3474 #define DLB_EVENT_EV_TYPE_SHIFT 28
3475 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3476         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3477                         qes[0].flow_id |
3478                         qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3479                         qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3480                         DLB_EVENT_EV_TYPE_DW);
3481         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3482                         qes[1].flow_id |
3483                         qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3484                         qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3485                         DLB_EVENT_EV_TYPE_DW + 2);
3486         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3487                         qes[2].flow_id |
3488                         qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3489                         qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3490                         DLB_EVENT_EV_TYPE_DW);
3491         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3492                         qes[3].flow_id |
3493                         qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
3494                         qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3495                         DLB_EVENT_EV_TYPE_DW + 2);
3496
3497         /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3498          * set:
3499          * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
3500          * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3501          * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
3502          * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3503          */
3504 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3505 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3506         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3507                 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3508                 DLB_EVENT_SCHED_TYPE_BYTE);
3509         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3510                 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3511                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3512         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3513                 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3514                 DLB_EVENT_SCHED_TYPE_BYTE);
3515         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3516                 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3517                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3518
3519         /* Store the metadata to the event (use the double-precision
3520          * _mm_storeh_pd because there is no integer function for storing the
3521          * upper 64b):
3522          * events[0].event = sse_evt[0][63:0]
3523          * events[1].event = sse_evt[0][127:64]
3524          * events[2].event = sse_evt[1][63:0]
3525          * events[3].event = sse_evt[1][127:64]
3526          */
3527         _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3528         _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3529         _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3530         _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3531
3532         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3533         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3534         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3535         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3536
3537         DLB2_INC_STAT(
3538                 ev_port->stats.queue[events[0].queue_id].
3539                         qid_depth[qes[0].qid_depth],
3540                 1);
3541         DLB2_INC_STAT(
3542                 ev_port->stats.queue[events[1].queue_id].
3543                         qid_depth[qes[1].qid_depth],
3544                 1);
3545         DLB2_INC_STAT(
3546                 ev_port->stats.queue[events[2].queue_id].
3547                         qid_depth[qes[2].qid_depth],
3548                 1);
3549         DLB2_INC_STAT(
3550                 ev_port->stats.queue[events[3].queue_id].
3551                         qid_depth[qes[3].qid_depth],
3552                 1);
3553
3554         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3555
3556         return num_events;
3557 }
3558
3559 static __rte_always_inline int
3560 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3561 {
3562         volatile struct dlb2_dequeue_qe *cq_addr;
3563         uint8_t xor_mask[2] = {0x0F, 0x00};
3564         const uint8_t and_mask = 0x0F;
3565         __m128i *qes = (__m128i *)qe;
3566         uint8_t gen_bits, gen_bit;
3567         uintptr_t addr[4];
3568         uint16_t idx;
3569
3570         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3571
3572         idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3573         /* Load the next 4 QEs */
3574         addr[0] = (uintptr_t)&cq_addr[idx];
3575         addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3576         addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3577         addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3578
3579         /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3580         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3581         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3582         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3583         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3584
3585         /* Correct the xor_mask for wrap-around QEs */
3586         gen_bit = qm_port->gen_bit;
3587         xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3588         xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3589         xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3590
3591         /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3592          * valid, then QEs[0:N-1] are too.
3593          */
3594         qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3595         rte_compiler_barrier();
3596         qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3597         rte_compiler_barrier();
3598         qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3599         rte_compiler_barrier();
3600         qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3601
3602         /* Extract and combine the gen bits */
3603         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3604                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3605                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3606                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3607
3608         /* XOR the combined bits such that a 1 represents a valid QE */
3609         gen_bits ^= xor_mask[gen_bit];
3610
3611         /* Mask off gen bits we don't care about */
3612         gen_bits &= and_mask;
3613
3614         return __builtin_popcount(gen_bits);
3615 }
3616
3617 static inline void
3618 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3619                           struct rte_event *events,
3620                           __m128i v_qe_3,
3621                           __m128i v_qe_2,
3622                           __m128i v_qe_1,
3623                           __m128i v_qe_0,
3624                           __m128i v_qe_meta,
3625                           __m128i v_qe_status,
3626                           uint32_t valid_events)
3627 {
3628         /* Look up the event QIDs, using the hardware QIDs to index the
3629          * port's QID mapping.
3630          *
3631          * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3632          * passed along in registers as the QE data is required later.
3633          *
3634          * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3635          * 32-bit slice of each QE, so makes up a full SSE register. This
3636          * allows parallel processing of 4x QEs in a single register.
3637          */
3638
3639         __m128i v_qid_done = {0};
3640         int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3641         int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3642         int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3643         int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3644
3645         int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3646         int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3647         int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3648         int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3649
3650         int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3651         int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3652         int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3653         int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3654
3655         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3656         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3657         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3658         v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3659
3660         /* Schedule field remapping using byte shuffle
3661          * - Full byte containing sched field handled here (op, rsvd are zero)
3662          * - Note sanitizing the register requires two masking ANDs:
3663          *   1) to strip prio/msg_type from byte for correct shuffle lookup
3664          *   2) to strip any non-sched-field lanes from any results to OR later
3665          * - Final byte result is >> 10 to another byte-lane inside the u32.
3666          *   This makes the final combination OR easier to make the rte_event.
3667          */
3668         __m128i v_sched_done;
3669         __m128i v_sched_bits;
3670         {
3671                 static const uint8_t sched_type_map[16] = {
3672                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3673                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3674                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3675                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3676                 };
3677                 static const uint8_t sched_and_mask[16] = {
3678                         0x00, 0x00, 0x00, 0x03,
3679                         0x00, 0x00, 0x00, 0x03,
3680                         0x00, 0x00, 0x00, 0x03,
3681                         0x00, 0x00, 0x00, 0x03,
3682                 };
3683                 const __m128i v_sched_map = _mm_loadu_si128(
3684                                              (const __m128i *)sched_type_map);
3685                 __m128i v_sched_mask = _mm_loadu_si128(
3686                                              (const __m128i *)&sched_and_mask);
3687                 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3688                 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3689                                                             v_sched_bits);
3690                 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3691                                                    v_sched_mask);
3692                 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3693         }
3694
3695         /* Priority handling
3696          * - QE provides 3 bits of priority
3697          * - Shift << 3 to move to MSBs for byte-prio in rte_event
3698          * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3699          */
3700         __m128i v_prio_done;
3701         {
3702                 static const uint8_t prio_mask[16] = {
3703                         0x00, 0x00, 0x00, 0x07 << 5,
3704                         0x00, 0x00, 0x00, 0x07 << 5,
3705                         0x00, 0x00, 0x00, 0x07 << 5,
3706                         0x00, 0x00, 0x00, 0x07 << 5,
3707                 };
3708                 __m128i v_prio_mask  = _mm_loadu_si128(
3709                                                 (const __m128i *)prio_mask);
3710                 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3711                 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3712         }
3713
3714         /* Event Sub/Type handling:
3715          * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3716          * to get the sub/ev type data into rte_event location, clearing the
3717          * lower 20 bits in the process.
3718          */
3719         __m128i v_types_done;
3720         {
3721                 static const uint8_t event_mask[16] = {
3722                         0x0f, 0x00, 0x00, 0x00,
3723                         0x0f, 0x00, 0x00, 0x00,
3724                         0x0f, 0x00, 0x00, 0x00,
3725                         0x0f, 0x00, 0x00, 0x00,
3726                 };
3727                 static const uint8_t sub_event_mask[16] = {
3728                         0xff, 0x00, 0x00, 0x00,
3729                         0xff, 0x00, 0x00, 0x00,
3730                         0xff, 0x00, 0x00, 0x00,
3731                         0xff, 0x00, 0x00, 0x00,
3732                 };
3733                 static const uint8_t flow_mask[16] = {
3734                         0xff, 0xff, 0x00, 0x00,
3735                         0xff, 0xff, 0x00, 0x00,
3736                         0xff, 0xff, 0x00, 0x00,
3737                         0xff, 0xff, 0x00, 0x00,
3738                 };
3739                 __m128i v_event_mask  = _mm_loadu_si128(
3740                                         (const __m128i *)event_mask);
3741                 __m128i v_sub_event_mask  = _mm_loadu_si128(
3742                                         (const __m128i *)sub_event_mask);
3743                 __m128i v_flow_mask  = _mm_loadu_si128(
3744                                        (const __m128i *)flow_mask);
3745                 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3746                 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3747                 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3748                 v_type = _mm_slli_epi32(v_type, 8);
3749                 v_types_done = _mm_or_si128(v_type, v_sub);
3750                 v_types_done = _mm_slli_epi32(v_types_done, 20);
3751                 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3752                 v_types_done = _mm_or_si128(v_types_done, v_flow);
3753         }
3754
3755         /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3756          * with the rte_event, allowing unpacks to move/blend with payload.
3757          */
3758         __m128i v_q_s_p_done;
3759         {
3760                 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3761                 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3762                 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3763         }
3764
3765         __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3766
3767         /* Unpack evs into u64 metadata, then indiv events */
3768         v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3769         v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3770
3771         switch (valid_events) {
3772         case 4:
3773                 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3774                 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3775                 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3776                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3777                               1);
3778                 /* fallthrough */
3779         case 3:
3780                 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3781                 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3782                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3783                               1);
3784                 /* fallthrough */
3785         case 2:
3786                 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3787                 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3788                 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3789                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3790                               1);
3791                 /* fallthrough */
3792         case 1:
3793                 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3794                 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3795                 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3796                               1);
3797         }
3798 }
3799
3800 static __rte_always_inline int
3801 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3802                         uint32_t max_events)
3803 {
3804         /* Using unmasked idx for perf, and masking manually */
3805         uint16_t idx = qm_port->cq_idx_unmasked;
3806         volatile struct dlb2_dequeue_qe *cq_addr;
3807
3808         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3809
3810         uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3811                                                  qm_port->cq_depth_mask];
3812         uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx +  8) &
3813                                                  qm_port->cq_depth_mask];
3814         uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx +  4) &
3815                                                  qm_port->cq_depth_mask];
3816         uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx +  0) &
3817                                                  qm_port->cq_depth_mask];
3818
3819         /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3820         __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3821         rte_compiler_barrier();
3822         __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3823         rte_compiler_barrier();
3824         __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3825         rte_compiler_barrier();
3826         __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3827
3828         /* Generate the pkt_shuffle mask;
3829          * - Avoids load in otherwise load-heavy section of code
3830          * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3831          */
3832         const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3833         __m128i v_zeros = _mm_setzero_si128();
3834         __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3835         __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3836
3837         /* Extract u32 components required from the QE
3838          * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3839          * - QE[96 to 127] for status (cq gen bit, error)
3840          *
3841          * Note that stage 1 of the unpacking is re-used for both u32 extracts
3842          */
3843         __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3844         __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3845         __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3846         __m128i v_qe_meta   = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3847
3848         /* Status byte (gen_bit, error) handling:
3849          * - Shuffle to lanes 0,1,2,3, clear all others
3850          * - Shift right by 7 for gen bit to MSB, movemask to scalar
3851          * - Shift right by 2 for error bit to MSB, movemask to scalar
3852          */
3853         __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3854         __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3855         int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3856
3857         /* Expected vs Reality of QE Gen bits
3858          * - cq_rolling_mask provides expected bits
3859          * - QE loads, unpacks/shuffle and movemask provides reality
3860          * - XOR of the two gives bitmask of new packets
3861          * - POPCNT to get the number of new events
3862          */
3863         uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3864         uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3865         uint32_t count_new = __builtin_popcount(qe_xor_bits);
3866         count_new = RTE_MIN(count_new, max_events);
3867         if (!count_new)
3868                 return 0;
3869
3870         /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3871
3872         uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3873         uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3874         uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3875         uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3876
3877         /* shifted out of m2 into MSB of m */
3878         qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3879
3880         /* shifted out of m "looped back" into MSB of m2 */
3881         qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3882
3883         /* Prefetch the next QEs - should run as IPC instead of cycles */
3884         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3885         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3886         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3887         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3888
3889         /* Convert QEs from XMM regs to events and store events directly */
3890         _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3891                                   v_qe_0, v_qe_meta, v_qe_status, count_new);
3892
3893         return count_new;
3894 }
3895
3896 static inline void
3897 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3898 {
3899         uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3900
3901         qm_port->cq_idx_unmasked = idx;
3902         qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3903         qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3904 }
3905
3906 static inline int16_t
3907 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3908                        struct dlb2_eventdev_port *ev_port,
3909                        struct rte_event *events,
3910                        uint16_t max_num,
3911                        uint64_t dequeue_timeout_ticks)
3912 {
3913         uint64_t start_ticks = 0ULL;
3914         struct dlb2_port *qm_port;
3915         int num = 0;
3916         bool use_scalar;
3917         uint64_t timeout;
3918
3919         qm_port = &ev_port->qm_port;
3920         use_scalar = qm_port->use_scalar;
3921
3922         if (!dlb2->global_dequeue_wait)
3923                 timeout = dequeue_timeout_ticks;
3924         else
3925                 timeout = dlb2->global_dequeue_wait_ticks;
3926
3927         start_ticks = rte_get_timer_cycles();
3928
3929         use_scalar = use_scalar || (max_num & 0x3);
3930
3931         while (num < max_num) {
3932                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3933                 int num_avail;
3934
3935                 if (use_scalar) {
3936                         int n_iter = 0;
3937                         uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3938
3939                         num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3940                         num_avail = RTE_MIN(num_avail, max_num - num);
3941                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3942                         if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3943                                 n_iter = dlb2_process_dequeue_four_qes(ev_port,
3944                                                                 qm_port,
3945                                                                 &events[num],
3946                                                                 &qes[0]);
3947                         else if (num_avail)
3948                                 n_iter = dlb2_process_dequeue_qes(ev_port,
3949                                                                 qm_port,
3950                                                                 &events[num],
3951                                                                 &qes[0],
3952                                                                 num_avail);
3953                         if (n_iter != 0) {
3954                                 num += n_iter;
3955                                 /* update rolling_mask for vector code support */
3956                                 m_rshift = qm_port->cq_rolling_mask >> n_iter;
3957                                 m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3958                                 m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3959                                 m2_lshift = qm_port->cq_rolling_mask_2 <<
3960                                         (64 - n_iter);
3961                                 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3962                                 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3963                         }
3964                 } else { /* !use_scalar */
3965                         num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3966                                                             &events[num],
3967                                                             max_num - num);
3968                         dlb2_inc_cq_idx(qm_port, num_avail << 2);
3969                         num += num_avail;
3970                         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3971                 }
3972                 if (!num_avail) {
3973                         if ((timeout == 0) || (num > 0))
3974                                 /* Not waiting in any form or 1+ events recd */
3975                                 break;
3976                         else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3977                                                    timeout, start_ticks))
3978                                 break;
3979                 }
3980         }
3981
3982         qm_port->owed_tokens += num;
3983
3984         if (num) {
3985                 if (qm_port->token_pop_mode == AUTO_POP)
3986                         dlb2_consume_qe_immediate(qm_port, num);
3987
3988                 ev_port->outstanding_releases += num;
3989
3990                 dlb2_port_credits_inc(qm_port, num);
3991         }
3992
3993         return num;
3994 }
3995
3996 static __rte_always_inline int
3997 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3998              uint8_t *offset)
3999 {
4000         uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
4001                                    {0x00, 0x01, 0x03, 0x07} };
4002         uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
4003         volatile struct dlb2_dequeue_qe *cq_addr;
4004         __m128i *qes = (__m128i *)qe;
4005         uint64_t *cache_line_base;
4006         uint8_t gen_bits;
4007
4008         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
4009         cq_addr = &cq_addr[qm_port->cq_idx];
4010
4011         cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
4012         *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
4013
4014         /* Load the next CQ cache line from memory. Pack these reads as tight
4015          * as possible to reduce the chance that DLB invalidates the line while
4016          * the CPU is reading it. Read the cache line backwards to ensure that
4017          * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
4018          *
4019          * (Valid QEs start at &qe[offset])
4020          */
4021         qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
4022         qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
4023         qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
4024         qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
4025
4026         /* Evict the cache line ASAP */
4027         rte_cldemote(cache_line_base);
4028
4029         /* Extract and combine the gen bits */
4030         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
4031                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
4032                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
4033                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4034
4035         /* XOR the combined bits such that a 1 represents a valid QE */
4036         gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4037
4038         /* Mask off gen bits we don't care about */
4039         gen_bits &= and_mask[*offset];
4040
4041         return __builtin_popcount(gen_bits);
4042 }
4043
4044 static inline int16_t
4045 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4046                 struct dlb2_eventdev_port *ev_port,
4047                 struct rte_event *events,
4048                 uint16_t max_num,
4049                 uint64_t dequeue_timeout_ticks)
4050 {
4051         uint64_t timeout;
4052         uint64_t start_ticks = 0ULL;
4053         struct dlb2_port *qm_port;
4054         int num = 0;
4055
4056         qm_port = &ev_port->qm_port;
4057
4058         /* We have a special implementation for waiting. Wait can be:
4059          * 1) no waiting at all
4060          * 2) busy poll only
4061          * 3) wait for interrupt. If wakeup and poll time
4062          * has expired, then return to caller
4063          * 4) umonitor/umwait repeatedly up to poll time
4064          */
4065
4066         /* If configured for per dequeue wait, then use wait value provided
4067          * to this API. Otherwise we must use the global
4068          * value from eventdev config time.
4069          */
4070         if (!dlb2->global_dequeue_wait)
4071                 timeout = dequeue_timeout_ticks;
4072         else
4073                 timeout = dlb2->global_dequeue_wait_ticks;
4074
4075         start_ticks = rte_get_timer_cycles();
4076
4077         while (num < max_num) {
4078                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4079                 uint8_t offset;
4080                 int num_avail;
4081
4082                 /* Copy up to 4 QEs from the current cache line into qes */
4083                 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4084
4085                 /* But don't process more than the user requested */
4086                 num_avail = RTE_MIN(num_avail, max_num - num);
4087
4088                 dlb2_inc_cq_idx(qm_port, num_avail);
4089
4090                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4091                         num += dlb2_process_dequeue_four_qes(ev_port,
4092                                                              qm_port,
4093                                                              &events[num],
4094                                                              &qes[offset]);
4095                 else if (num_avail)
4096                         num += dlb2_process_dequeue_qes(ev_port,
4097                                                         qm_port,
4098                                                         &events[num],
4099                                                         &qes[offset],
4100                                                         num_avail);
4101                 else if ((timeout == 0) || (num > 0))
4102                         /* Not waiting in any form, or 1+ events received? */
4103                         break;
4104                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4105                                            timeout, start_ticks))
4106                         break;
4107         }
4108
4109         qm_port->owed_tokens += num;
4110
4111         if (num) {
4112                 if (qm_port->token_pop_mode == AUTO_POP)
4113                         dlb2_consume_qe_immediate(qm_port, num);
4114
4115                 ev_port->outstanding_releases += num;
4116
4117                 dlb2_port_credits_inc(qm_port, num);
4118         }
4119
4120         return num;
4121 }
4122
4123 static uint16_t
4124 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4125                          uint64_t wait)
4126 {
4127         struct dlb2_eventdev_port *ev_port = event_port;
4128         struct dlb2_port *qm_port = &ev_port->qm_port;
4129         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4130         uint16_t cnt;
4131
4132         RTE_ASSERT(ev_port->setup_done);
4133         RTE_ASSERT(ev != NULL);
4134
4135         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4136                 uint16_t out_rels = ev_port->outstanding_releases;
4137
4138                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4139
4140                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4141         }
4142
4143         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4144                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4145
4146         cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4147
4148         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4149         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4150
4151         return cnt;
4152 }
4153
4154 static uint16_t
4155 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4156 {
4157         return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4158 }
4159
4160 static uint16_t
4161 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4162                                 uint16_t num, uint64_t wait)
4163 {
4164         struct dlb2_eventdev_port *ev_port = event_port;
4165         struct dlb2_port *qm_port = &ev_port->qm_port;
4166         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4167         uint16_t cnt;
4168
4169         RTE_ASSERT(ev_port->setup_done);
4170         RTE_ASSERT(ev != NULL);
4171
4172         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4173                 uint16_t out_rels = ev_port->outstanding_releases;
4174
4175                 dlb2_event_release(dlb2, ev_port->id, out_rels);
4176
4177                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4178         }
4179
4180         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4181                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4182
4183         cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4184
4185         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4186         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4187         return cnt;
4188 }
4189
4190 static uint16_t
4191 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4192                           uint64_t wait)
4193 {
4194         return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4195 }
4196
4197 static void
4198 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4199 {
4200         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4201         eventdev_stop_flush_t flush;
4202         struct rte_event ev;
4203         uint8_t dev_id;
4204         void *arg;
4205         int i;
4206
4207         flush = dev->dev_ops->dev_stop_flush;
4208         dev_id = dev->data->dev_id;
4209         arg = dev->data->dev_stop_flush_arg;
4210
4211         while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4212                 if (flush)
4213                         flush(dev_id, ev, arg);
4214
4215                 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4216                         continue;
4217
4218                 ev.op = RTE_EVENT_OP_RELEASE;
4219
4220                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4221         }
4222
4223         /* Enqueue any additional outstanding releases */
4224         ev.op = RTE_EVENT_OP_RELEASE;
4225
4226         for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4227                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4228 }
4229
4230 static uint32_t
4231 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4232                          struct dlb2_eventdev_queue *queue)
4233 {
4234         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4235         struct dlb2_get_ldb_queue_depth_args cfg;
4236         int ret;
4237
4238         cfg.queue_id = queue->qm_queue.id;
4239
4240         ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4241         if (ret < 0) {
4242                 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4243                              ret, dlb2_error_strings[cfg.response.status]);
4244                 return ret;
4245         }
4246
4247         return cfg.response.id;
4248 }
4249
4250 static uint32_t
4251 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4252                          struct dlb2_eventdev_queue *queue)
4253 {
4254         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4255         struct dlb2_get_dir_queue_depth_args cfg;
4256         int ret;
4257
4258         cfg.queue_id = queue->qm_queue.id;
4259
4260         ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4261         if (ret < 0) {
4262                 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4263                              ret, dlb2_error_strings[cfg.response.status]);
4264                 return ret;
4265         }
4266
4267         return cfg.response.id;
4268 }
4269
4270 uint32_t
4271 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4272                      struct dlb2_eventdev_queue *queue)
4273 {
4274         if (queue->qm_queue.is_directed)
4275                 return dlb2_get_dir_queue_depth(dlb2, queue);
4276         else
4277                 return dlb2_get_ldb_queue_depth(dlb2, queue);
4278 }
4279
4280 static bool
4281 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4282                     struct dlb2_eventdev_queue *queue)
4283 {
4284         return dlb2_get_queue_depth(dlb2, queue) == 0;
4285 }
4286
4287 static bool
4288 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4289 {
4290         int i;
4291
4292         for (i = 0; i < dlb2->num_queues; i++) {
4293                 if (dlb2->ev_queues[i].num_links == 0)
4294                         continue;
4295                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4296                         return false;
4297         }
4298
4299         return true;
4300 }
4301
4302 static bool
4303 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4304 {
4305         int i;
4306
4307         for (i = 0; i < dlb2->num_queues; i++) {
4308                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4309                         return false;
4310         }
4311
4312         return true;
4313 }
4314
4315 static void
4316 dlb2_drain(struct rte_eventdev *dev)
4317 {
4318         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4319         struct dlb2_eventdev_port *ev_port = NULL;
4320         uint8_t dev_id;
4321         int i;
4322
4323         dev_id = dev->data->dev_id;
4324
4325         while (!dlb2_linked_queues_empty(dlb2)) {
4326                 /* Flush all the ev_ports, which will drain all their connected
4327                  * queues.
4328                  */
4329                 for (i = 0; i < dlb2->num_ports; i++)
4330                         dlb2_flush_port(dev, i);
4331         }
4332
4333         /* The queues are empty, but there may be events left in the ports. */
4334         for (i = 0; i < dlb2->num_ports; i++)
4335                 dlb2_flush_port(dev, i);
4336
4337         /* If the domain's queues are empty, we're done. */
4338         if (dlb2_queues_empty(dlb2))
4339                 return;
4340
4341         /* Else, there must be at least one unlinked load-balanced queue.
4342          * Select a load-balanced port with which to drain the unlinked
4343          * queue(s).
4344          */
4345         for (i = 0; i < dlb2->num_ports; i++) {
4346                 ev_port = &dlb2->ev_ports[i];
4347
4348                 if (!ev_port->qm_port.is_directed)
4349                         break;
4350         }
4351
4352         if (i == dlb2->num_ports) {
4353                 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4354                 return;
4355         }
4356
4357         rte_errno = 0;
4358         rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4359
4360         if (rte_errno) {
4361                 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4362                              ev_port->id);
4363                 return;
4364         }
4365
4366         for (i = 0; i < dlb2->num_queues; i++) {
4367                 uint8_t qid, prio;
4368                 int ret;
4369
4370                 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4371                         continue;
4372
4373                 qid = i;
4374                 prio = 0;
4375
4376                 /* Link the ev_port to the queue */
4377                 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4378                 if (ret != 1) {
4379                         DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4380                                      ev_port->id, qid);
4381                         return;
4382                 }
4383
4384                 /* Flush the queue */
4385                 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4386                         dlb2_flush_port(dev, ev_port->id);
4387
4388                 /* Drain any extant events in the ev_port. */
4389                 dlb2_flush_port(dev, ev_port->id);
4390
4391                 /* Unlink the ev_port from the queue */
4392                 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4393                 if (ret != 1) {
4394                         DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4395                                      ev_port->id, qid);
4396                         return;
4397                 }
4398         }
4399 }
4400
4401 static void
4402 dlb2_eventdev_stop(struct rte_eventdev *dev)
4403 {
4404         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4405
4406         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4407
4408         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4409                 DLB2_LOG_DBG("Internal error: already stopped\n");
4410                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4411                 return;
4412         } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4413                 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4414                              (int)dlb2->run_state);
4415                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4416                 return;
4417         }
4418
4419         dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4420
4421         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4422
4423         dlb2_drain(dev);
4424
4425         dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4426 }
4427
4428 static int
4429 dlb2_eventdev_close(struct rte_eventdev *dev)
4430 {
4431         dlb2_hw_reset_sched_domain(dev, false);
4432
4433         return 0;
4434 }
4435
4436 static void
4437 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4438 {
4439         RTE_SET_USED(dev);
4440         RTE_SET_USED(id);
4441
4442         /* This function intentionally left blank. */
4443 }
4444
4445 static void
4446 dlb2_eventdev_port_release(void *port)
4447 {
4448         struct dlb2_eventdev_port *ev_port = port;
4449         struct dlb2_port *qm_port;
4450
4451         if (ev_port) {
4452                 qm_port = &ev_port->qm_port;
4453                 if (qm_port->config_state == DLB2_CONFIGURED)
4454                         dlb2_free_qe_mem(qm_port);
4455         }
4456 }
4457
4458 static int
4459 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4460                             uint64_t *timeout_ticks)
4461 {
4462         RTE_SET_USED(dev);
4463         uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4464
4465         *timeout_ticks = ns * cycles_per_ns;
4466
4467         return 0;
4468 }
4469
4470 static void
4471 dlb2_entry_points_init(struct rte_eventdev *dev)
4472 {
4473         struct dlb2_eventdev *dlb2;
4474
4475         /* Expose PMD's eventdev interface */
4476         static struct eventdev_ops dlb2_eventdev_entry_ops = {
4477                 .dev_infos_get    = dlb2_eventdev_info_get,
4478                 .dev_configure    = dlb2_eventdev_configure,
4479                 .dev_start        = dlb2_eventdev_start,
4480                 .dev_stop         = dlb2_eventdev_stop,
4481                 .dev_close        = dlb2_eventdev_close,
4482                 .queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
4483                 .queue_setup      = dlb2_eventdev_queue_setup,
4484                 .queue_release    = dlb2_eventdev_queue_release,
4485                 .port_def_conf    = dlb2_eventdev_port_default_conf_get,
4486                 .port_setup       = dlb2_eventdev_port_setup,
4487                 .port_release     = dlb2_eventdev_port_release,
4488                 .port_link        = dlb2_eventdev_port_link,
4489                 .port_unlink      = dlb2_eventdev_port_unlink,
4490                 .port_unlinks_in_progress =
4491                                     dlb2_eventdev_port_unlinks_in_progress,
4492                 .timeout_ticks    = dlb2_eventdev_timeout_ticks,
4493                 .dump             = dlb2_eventdev_dump,
4494                 .xstats_get       = dlb2_eventdev_xstats_get,
4495                 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4496                 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4497                 .xstats_reset       = dlb2_eventdev_xstats_reset,
4498                 .dev_selftest     = test_dlb2_eventdev,
4499         };
4500
4501         /* Expose PMD's eventdev interface */
4502
4503         dev->dev_ops = &dlb2_eventdev_entry_ops;
4504         dev->enqueue = dlb2_event_enqueue;
4505         dev->enqueue_burst = dlb2_event_enqueue_burst;
4506         dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4507         dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4508
4509         dlb2 = dev->data->dev_private;
4510         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4511                 dev->dequeue = dlb2_event_dequeue_sparse;
4512                 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4513         } else {
4514                 dev->dequeue = dlb2_event_dequeue;
4515                 dev->dequeue_burst = dlb2_event_dequeue_burst;
4516         }
4517 }
4518
4519 int
4520 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4521                             const char *name,
4522                             struct dlb2_devargs *dlb2_args)
4523 {
4524         struct dlb2_eventdev *dlb2;
4525         int err, i;
4526
4527         dlb2 = dev->data->dev_private;
4528
4529         dlb2->event_dev = dev; /* backlink */
4530
4531         evdev_dlb2_default_info.driver_name = name;
4532
4533         dlb2->max_num_events_override = dlb2_args->max_num_events;
4534         dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4535         dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4536         dlb2->poll_interval = dlb2_args->poll_interval;
4537         dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4538         dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4539         dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4540         dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4541         dlb2->max_cq_depth = dlb2_args->max_cq_depth;
4542
4543         err = dlb2_iface_open(&dlb2->qm_instance, name);
4544         if (err < 0) {
4545                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4546                              err);
4547                 return err;
4548         }
4549
4550         err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4551                                             &dlb2->revision);
4552         if (err < 0) {
4553                 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4554                              err);
4555                 return err;
4556         }
4557
4558         err = dlb2_hw_query_resources(dlb2);
4559         if (err) {
4560                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4561                              err, name);
4562                 return err;
4563         }
4564
4565         dlb2_iface_hardware_init(&dlb2->qm_instance);
4566
4567         err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4568         if (err < 0) {
4569                 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4570                              err);
4571                 return err;
4572         }
4573
4574         /* Complete xtstats runtime initialization */
4575         err = dlb2_xstats_init(dlb2);
4576         if (err) {
4577                 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4578                 return err;
4579         }
4580
4581         /* Initialize each port's token pop mode */
4582         for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4583                 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4584
4585         rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4586
4587         dlb2_iface_low_level_io_init();
4588
4589         dlb2_entry_points_init(dev);
4590
4591         dlb2_init_queue_depth_thresholds(dlb2,
4592                                          dlb2_args->qid_depth_thresholds.val);
4593
4594         return 0;
4595 }
4596
4597 int
4598 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4599                               const char *name)
4600 {
4601         struct dlb2_eventdev *dlb2;
4602         int err;
4603
4604         dlb2 = dev->data->dev_private;
4605
4606         evdev_dlb2_default_info.driver_name = name;
4607
4608         err = dlb2_iface_open(&dlb2->qm_instance, name);
4609         if (err < 0) {
4610                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4611                              err);
4612                 return err;
4613         }
4614
4615         err = dlb2_hw_query_resources(dlb2);
4616         if (err) {
4617                 DLB2_LOG_ERR("get resources err=%d for %s\n",
4618                              err, name);
4619                 return err;
4620         }
4621
4622         dlb2_iface_low_level_io_init();
4623
4624         dlb2_entry_points_init(dev);
4625
4626         return 0;
4627 }
4628
4629 int
4630 dlb2_parse_params(const char *params,
4631                   const char *name,
4632                   struct dlb2_devargs *dlb2_args,
4633                   uint8_t version)
4634 {
4635         int ret = 0;
4636         static const char * const args[] = { NUMA_NODE_ARG,
4637                                              DLB2_MAX_NUM_EVENTS,
4638                                              DLB2_NUM_DIR_CREDITS,
4639                                              DEV_ID_ARG,
4640                                              DLB2_QID_DEPTH_THRESH_ARG,
4641                                              DLB2_COS_ARG,
4642                                              DLB2_POLL_INTERVAL_ARG,
4643                                              DLB2_SW_CREDIT_QUANTA_ARG,
4644                                              DLB2_HW_CREDIT_QUANTA_ARG,
4645                                              DLB2_DEPTH_THRESH_ARG,
4646                                              DLB2_VECTOR_OPTS_ENAB_ARG,
4647                                              DLB2_MAX_CQ_DEPTH,
4648                                              NULL };
4649
4650         if (params != NULL && params[0] != '\0') {
4651                 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4652
4653                 if (kvlist == NULL) {
4654                         RTE_LOG(INFO, PMD,
4655                                 "Ignoring unsupported parameters when creating device '%s'\n",
4656                                 name);
4657                 } else {
4658                         int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4659                                                      set_numa_node,
4660                                                      &dlb2_args->socket_id);
4661                         if (ret != 0) {
4662                                 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4663                                              name);
4664                                 rte_kvargs_free(kvlist);
4665                                 return ret;
4666                         }
4667
4668                         ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4669                                                  set_max_num_events,
4670                                                  &dlb2_args->max_num_events);
4671                         if (ret != 0) {
4672                                 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4673                                              name);
4674                                 rte_kvargs_free(kvlist);
4675                                 return ret;
4676                         }
4677
4678                         if (version == DLB2_HW_V2) {
4679                                 ret = rte_kvargs_process(kvlist,
4680                                         DLB2_NUM_DIR_CREDITS,
4681                                         set_num_dir_credits,
4682                                         &dlb2_args->num_dir_credits_override);
4683                                 if (ret != 0) {
4684                                         DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4685                                                      name);
4686                                         rte_kvargs_free(kvlist);
4687                                         return ret;
4688                                 }
4689                         }
4690                         ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4691                                                  set_dev_id,
4692                                                  &dlb2_args->dev_id);
4693                         if (ret != 0) {
4694                                 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4695                                              name);
4696                                 rte_kvargs_free(kvlist);
4697                                 return ret;
4698                         }
4699
4700                         if (version == DLB2_HW_V2) {
4701                                 ret = rte_kvargs_process(
4702                                         kvlist,
4703                                         DLB2_QID_DEPTH_THRESH_ARG,
4704                                         set_qid_depth_thresh,
4705                                         &dlb2_args->qid_depth_thresholds);
4706                         } else {
4707                                 ret = rte_kvargs_process(
4708                                         kvlist,
4709                                         DLB2_QID_DEPTH_THRESH_ARG,
4710                                         set_qid_depth_thresh_v2_5,
4711                                         &dlb2_args->qid_depth_thresholds);
4712                         }
4713                         if (ret != 0) {
4714                                 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4715                                              name);
4716                                 rte_kvargs_free(kvlist);
4717                                 return ret;
4718                         }
4719
4720                         ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4721                                                  set_cos,
4722                                                  &dlb2_args->cos_id);
4723                         if (ret != 0) {
4724                                 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4725                                              name);
4726                                 rte_kvargs_free(kvlist);
4727                                 return ret;
4728                         }
4729
4730                         ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4731                                                  set_poll_interval,
4732                                                  &dlb2_args->poll_interval);
4733                         if (ret != 0) {
4734                                 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4735                                              name);
4736                                 rte_kvargs_free(kvlist);
4737                                 return ret;
4738                         }
4739
4740                         ret = rte_kvargs_process(kvlist,
4741                                                  DLB2_SW_CREDIT_QUANTA_ARG,
4742                                                  set_sw_credit_quanta,
4743                                                  &dlb2_args->sw_credit_quanta);
4744                         if (ret != 0) {
4745                                 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4746                                              name);
4747                                 rte_kvargs_free(kvlist);
4748                                 return ret;
4749                         }
4750
4751                         ret = rte_kvargs_process(kvlist,
4752                                                  DLB2_HW_CREDIT_QUANTA_ARG,
4753                                                  set_hw_credit_quanta,
4754                                                  &dlb2_args->hw_credit_quanta);
4755                         if (ret != 0) {
4756                                 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4757                                              name);
4758                                 rte_kvargs_free(kvlist);
4759                                 return ret;
4760                         }
4761
4762                         ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4763                                         set_default_depth_thresh,
4764                                         &dlb2_args->default_depth_thresh);
4765                         if (ret != 0) {
4766                                 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4767                                              name);
4768                                 rte_kvargs_free(kvlist);
4769                                 return ret;
4770                         }
4771
4772                         ret = rte_kvargs_process(kvlist,
4773                                         DLB2_VECTOR_OPTS_ENAB_ARG,
4774                                         set_vector_opts_enab,
4775                                         &dlb2_args->vector_opts_enabled);
4776                         if (ret != 0) {
4777                                 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4778                                              name);
4779                                 rte_kvargs_free(kvlist);
4780                                 return ret;
4781                         }
4782
4783                         ret = rte_kvargs_process(kvlist,
4784                                         DLB2_MAX_CQ_DEPTH,
4785                                         set_max_cq_depth,
4786                                         &dlb2_args->max_cq_depth);
4787                         if (ret != 0) {
4788                                 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4789                                              name);
4790                                 rte_kvargs_free(kvlist);
4791                                 return ret;
4792                         }
4793
4794                         rte_kvargs_free(kvlist);
4795                 }
4796         }
4797         return ret;
4798 }
4799 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);