event/dlb2: add timeout ticks entry point
[dpdk.git] / drivers / event / dlb2 / dlb2.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2020 Intel Corporation
3  */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <sys/fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <rte_eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46         .driver_name = "", /* probe will set */
47         .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48         .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50         .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52         .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54         .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55         .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56         .max_event_priority_levels = DLB2_QID_PRIORITIES,
57         .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58         .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59         .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60         .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61         .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62         .max_single_link_event_port_queue_pairs = DLB2_MAX_NUM_DIR_PORTS,
63         .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
64                           RTE_EVENT_DEV_CAP_EVENT_QOS |
65                           RTE_EVENT_DEV_CAP_BURST_MODE |
66                           RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
67                           RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
68                           RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES),
69 };
70
71 struct process_local_port_data
72 dlb2_port[DLB2_MAX_NUM_PORTS][DLB2_NUM_PORT_TYPES];
73
74 static void
75 dlb2_free_qe_mem(struct dlb2_port *qm_port)
76 {
77         if (qm_port == NULL)
78                 return;
79
80         rte_free(qm_port->qe4);
81         qm_port->qe4 = NULL;
82
83         rte_free(qm_port->int_arm_qe);
84         qm_port->int_arm_qe = NULL;
85
86         rte_free(qm_port->consume_qe);
87         qm_port->consume_qe = NULL;
88
89         rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
90         dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
91 }
92
93 /* override defaults with value(s) provided on command line */
94 static void
95 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
96                                  int *qid_depth_thresholds)
97 {
98         int q;
99
100         for (q = 0; q < DLB2_MAX_NUM_QUEUES; q++) {
101                 if (qid_depth_thresholds[q] != 0)
102                         dlb2->ev_queues[q].depth_threshold =
103                                 qid_depth_thresholds[q];
104         }
105 }
106
107 static int
108 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
109 {
110         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
111         struct dlb2_hw_resource_info *dlb2_info = &handle->info;
112         int ret;
113
114         /* Query driver resources provisioned for this device */
115
116         ret = dlb2_iface_get_num_resources(handle,
117                                            &dlb2->hw_rsrc_query_results);
118         if (ret) {
119                 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
120                 return ret;
121         }
122
123         /* Complete filling in device resource info returned to evdev app,
124          * overriding any default values.
125          * The capabilities (CAPs) were set at compile time.
126          */
127
128         evdev_dlb2_default_info.max_event_queues =
129                 dlb2->hw_rsrc_query_results.num_ldb_queues;
130
131         evdev_dlb2_default_info.max_event_ports =
132                 dlb2->hw_rsrc_query_results.num_ldb_ports;
133
134         evdev_dlb2_default_info.max_num_events =
135                 dlb2->hw_rsrc_query_results.num_ldb_credits;
136
137         /* Save off values used when creating the scheduling domain. */
138
139         handle->info.num_sched_domains =
140                 dlb2->hw_rsrc_query_results.num_sched_domains;
141
142         handle->info.hw_rsrc_max.nb_events_limit =
143                 dlb2->hw_rsrc_query_results.num_ldb_credits;
144
145         handle->info.hw_rsrc_max.num_queues =
146                 dlb2->hw_rsrc_query_results.num_ldb_queues +
147                 dlb2->hw_rsrc_query_results.num_dir_ports;
148
149         handle->info.hw_rsrc_max.num_ldb_queues =
150                 dlb2->hw_rsrc_query_results.num_ldb_queues;
151
152         handle->info.hw_rsrc_max.num_ldb_ports =
153                 dlb2->hw_rsrc_query_results.num_ldb_ports;
154
155         handle->info.hw_rsrc_max.num_dir_ports =
156                 dlb2->hw_rsrc_query_results.num_dir_ports;
157
158         handle->info.hw_rsrc_max.reorder_window_size =
159                 dlb2->hw_rsrc_query_results.num_hist_list_entries;
160
161         rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
162
163         return 0;
164 }
165
166 #define DLB2_BASE_10 10
167
168 static int
169 dlb2_string_to_int(int *result, const char *str)
170 {
171         long ret;
172         char *endptr;
173
174         if (str == NULL || result == NULL)
175                 return -EINVAL;
176
177         errno = 0;
178         ret = strtol(str, &endptr, DLB2_BASE_10);
179         if (errno)
180                 return -errno;
181
182         /* long int and int may be different width for some architectures */
183         if (ret < INT_MIN || ret > INT_MAX || endptr == str)
184                 return -EINVAL;
185
186         *result = ret;
187         return 0;
188 }
189
190 static int
191 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
192 {
193         int *socket_id = opaque;
194         int ret;
195
196         ret = dlb2_string_to_int(socket_id, value);
197         if (ret < 0)
198                 return ret;
199
200         if (*socket_id > RTE_MAX_NUMA_NODES)
201                 return -EINVAL;
202         return 0;
203 }
204
205 static int
206 set_max_num_events(const char *key __rte_unused,
207                    const char *value,
208                    void *opaque)
209 {
210         int *max_num_events = opaque;
211         int ret;
212
213         if (value == NULL || opaque == NULL) {
214                 DLB2_LOG_ERR("NULL pointer\n");
215                 return -EINVAL;
216         }
217
218         ret = dlb2_string_to_int(max_num_events, value);
219         if (ret < 0)
220                 return ret;
221
222         if (*max_num_events < 0 || *max_num_events >
223                         DLB2_MAX_NUM_LDB_CREDITS) {
224                 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
225                              DLB2_MAX_NUM_LDB_CREDITS);
226                 return -EINVAL;
227         }
228
229         return 0;
230 }
231
232 static int
233 set_num_dir_credits(const char *key __rte_unused,
234                     const char *value,
235                     void *opaque)
236 {
237         int *num_dir_credits = opaque;
238         int ret;
239
240         if (value == NULL || opaque == NULL) {
241                 DLB2_LOG_ERR("NULL pointer\n");
242                 return -EINVAL;
243         }
244
245         ret = dlb2_string_to_int(num_dir_credits, value);
246         if (ret < 0)
247                 return ret;
248
249         if (*num_dir_credits < 0 ||
250             *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS) {
251                 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
252                              DLB2_MAX_NUM_DIR_CREDITS);
253                 return -EINVAL;
254         }
255
256         return 0;
257 }
258
259 static int
260 set_dev_id(const char *key __rte_unused,
261            const char *value,
262            void *opaque)
263 {
264         int *dev_id = opaque;
265         int ret;
266
267         if (value == NULL || opaque == NULL) {
268                 DLB2_LOG_ERR("NULL pointer\n");
269                 return -EINVAL;
270         }
271
272         ret = dlb2_string_to_int(dev_id, value);
273         if (ret < 0)
274                 return ret;
275
276         return 0;
277 }
278
279 static int
280 set_cos(const char *key __rte_unused,
281         const char *value,
282         void *opaque)
283 {
284         enum dlb2_cos *cos_id = opaque;
285         int x = 0;
286         int ret;
287
288         if (value == NULL || opaque == NULL) {
289                 DLB2_LOG_ERR("NULL pointer\n");
290                 return -EINVAL;
291         }
292
293         ret = dlb2_string_to_int(&x, value);
294         if (ret < 0)
295                 return ret;
296
297         if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
298                 DLB2_LOG_ERR(
299                         "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
300                         x);
301                 return -EINVAL;
302         }
303
304         *cos_id = x;
305
306         return 0;
307 }
308
309
310 static int
311 set_qid_depth_thresh(const char *key __rte_unused,
312                      const char *value,
313                      void *opaque)
314 {
315         struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
316         int first, last, thresh, i;
317
318         if (value == NULL || opaque == NULL) {
319                 DLB2_LOG_ERR("NULL pointer\n");
320                 return -EINVAL;
321         }
322
323         /* command line override may take one of the following 3 forms:
324          * qid_depth_thresh=all:<threshold_value> ... all queues
325          * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
326          * qid_depth_thresh=qid:<threshold_value> ... just one queue
327          */
328         if (sscanf(value, "all:%d", &thresh) == 1) {
329                 first = 0;
330                 last = DLB2_MAX_NUM_QUEUES - 1;
331         } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
332                 /* we have everything we need */
333         } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
334                 last = first;
335         } else {
336                 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
337                 return -EINVAL;
338         }
339
340         if (first > last || first < 0 || last >= DLB2_MAX_NUM_QUEUES) {
341                 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
342                 return -EINVAL;
343         }
344
345         if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
346                 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
347                              DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
348                 return -EINVAL;
349         }
350
351         for (i = first; i <= last; i++)
352                 qid_thresh->val[i] = thresh; /* indexed by qid */
353
354         return 0;
355 }
356
357 static void
358 dlb2_eventdev_info_get(struct rte_eventdev *dev,
359                        struct rte_event_dev_info *dev_info)
360 {
361         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
362         int ret;
363
364         ret = dlb2_hw_query_resources(dlb2);
365         if (ret) {
366                 const struct rte_eventdev_data *data = dev->data;
367
368                 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
369                              ret, data->dev_id);
370                 /* fn is void, so fall through and return values set up in
371                  * probe
372                  */
373         }
374
375         /* Add num resources currently owned by this domain.
376          * These would become available if the scheduling domain were reset due
377          * to the application recalling eventdev_configure to *reconfigure* the
378          * domain.
379          */
380         evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
381         evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
382         evdev_dlb2_default_info.max_num_events += dlb2->max_ldb_credits;
383
384         evdev_dlb2_default_info.max_event_queues =
385                 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
386                         RTE_EVENT_MAX_QUEUES_PER_DEV);
387
388         evdev_dlb2_default_info.max_num_events =
389                 RTE_MIN(evdev_dlb2_default_info.max_num_events,
390                         dlb2->max_num_events_override);
391
392         *dev_info = evdev_dlb2_default_info;
393 }
394
395 static int
396 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
397                             const struct dlb2_hw_rsrcs *resources_asked)
398 {
399         int ret = 0;
400         struct dlb2_create_sched_domain_args *cfg;
401
402         if (resources_asked == NULL) {
403                 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
404                 ret = EINVAL;
405                 goto error_exit;
406         }
407
408         /* Map generic qm resources to dlb2 resources */
409         cfg = &handle->cfg.resources;
410
411         /* DIR ports and queues */
412
413         cfg->num_dir_ports = resources_asked->num_dir_ports;
414
415         cfg->num_dir_credits = resources_asked->num_dir_credits;
416
417         /* LDB queues */
418
419         cfg->num_ldb_queues = resources_asked->num_ldb_queues;
420
421         /* LDB ports */
422
423         cfg->cos_strict = 0; /* Best effort */
424         cfg->num_cos_ldb_ports[0] = 0;
425         cfg->num_cos_ldb_ports[1] = 0;
426         cfg->num_cos_ldb_ports[2] = 0;
427         cfg->num_cos_ldb_ports[3] = 0;
428
429         switch (handle->cos_id) {
430         case DLB2_COS_0:
431                 cfg->num_ldb_ports = 0; /* no don't care ports */
432                 cfg->num_cos_ldb_ports[0] =
433                         resources_asked->num_ldb_ports;
434                 break;
435         case DLB2_COS_1:
436                 cfg->num_ldb_ports = 0; /* no don't care ports */
437                 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
438                 break;
439         case DLB2_COS_2:
440                 cfg->num_ldb_ports = 0; /* no don't care ports */
441                 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
442                 break;
443         case DLB2_COS_3:
444                 cfg->num_ldb_ports = 0; /* no don't care ports */
445                 cfg->num_cos_ldb_ports[3] =
446                         resources_asked->num_ldb_ports;
447                 break;
448         case DLB2_COS_DEFAULT:
449                 /* all ldb ports are don't care ports from a cos perspective */
450                 cfg->num_ldb_ports =
451                         resources_asked->num_ldb_ports;
452                 break;
453         }
454
455         cfg->num_ldb_credits =
456                 resources_asked->num_ldb_credits;
457
458         cfg->num_atomic_inflights =
459                 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
460                 cfg->num_ldb_queues;
461
462         cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
463                 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
464
465         DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
466                      cfg->num_ldb_queues,
467                      resources_asked->num_ldb_ports,
468                      cfg->num_dir_ports,
469                      cfg->num_atomic_inflights,
470                      cfg->num_hist_list_entries,
471                      cfg->num_ldb_credits,
472                      cfg->num_dir_credits);
473
474         /* Configure the QM */
475
476         ret = dlb2_iface_sched_domain_create(handle, cfg);
477         if (ret < 0) {
478                 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
479                              ret,
480                              dlb2_error_strings[cfg->response.status]);
481
482                 goto error_exit;
483         }
484
485         handle->domain_id = cfg->response.id;
486         handle->cfg.configured = true;
487
488 error_exit:
489
490         return ret;
491 }
492
493 static void
494 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
495 {
496         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
497         enum dlb2_configuration_state config_state;
498         int i, j;
499
500         dlb2_iface_domain_reset(dlb2);
501
502         /* Free all dynamically allocated port memory */
503         for (i = 0; i < dlb2->num_ports; i++)
504                 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
505
506         /* If reconfiguring, mark the device's queues and ports as "previously
507          * configured." If the user doesn't reconfigure them, the PMD will
508          * reapply their previous configuration when the device is started.
509          */
510         config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
511                 DLB2_NOT_CONFIGURED;
512
513         for (i = 0; i < dlb2->num_ports; i++) {
514                 dlb2->ev_ports[i].qm_port.config_state = config_state;
515                 /* Reset setup_done so ports can be reconfigured */
516                 dlb2->ev_ports[i].setup_done = false;
517                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
518                         dlb2->ev_ports[i].link[j].mapped = false;
519         }
520
521         for (i = 0; i < dlb2->num_queues; i++)
522                 dlb2->ev_queues[i].qm_queue.config_state = config_state;
523
524         for (i = 0; i < DLB2_MAX_NUM_QUEUES; i++)
525                 dlb2->ev_queues[i].setup_done = false;
526
527         dlb2->num_ports = 0;
528         dlb2->num_ldb_ports = 0;
529         dlb2->num_dir_ports = 0;
530         dlb2->num_queues = 0;
531         dlb2->num_ldb_queues = 0;
532         dlb2->num_dir_queues = 0;
533         dlb2->configured = false;
534 }
535
536 /* Note: 1 QM instance per QM device, QM instance/device == event device */
537 static int
538 dlb2_eventdev_configure(const struct rte_eventdev *dev)
539 {
540         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
541         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
542         struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
543         const struct rte_eventdev_data *data = dev->data;
544         const struct rte_event_dev_config *config = &data->dev_conf;
545         int ret;
546
547         /* If this eventdev is already configured, we must release the current
548          * scheduling domain before attempting to configure a new one.
549          */
550         if (dlb2->configured) {
551                 dlb2_hw_reset_sched_domain(dev, true);
552
553                 ret = dlb2_hw_query_resources(dlb2);
554                 if (ret) {
555                         DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
556                                      ret, data->dev_id);
557                         return ret;
558                 }
559         }
560
561         if (config->nb_event_queues > rsrcs->num_queues) {
562                 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
563                              config->nb_event_queues,
564                              rsrcs->num_queues);
565                 return -EINVAL;
566         }
567         if (config->nb_event_ports > (rsrcs->num_ldb_ports
568                         + rsrcs->num_dir_ports)) {
569                 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
570                              config->nb_event_ports,
571                              (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
572                 return -EINVAL;
573         }
574         if (config->nb_events_limit > rsrcs->nb_events_limit) {
575                 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
576                              config->nb_events_limit,
577                              rsrcs->nb_events_limit);
578                 return -EINVAL;
579         }
580
581         if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
582                 dlb2->global_dequeue_wait = false;
583         else {
584                 uint32_t timeout32;
585
586                 dlb2->global_dequeue_wait = true;
587
588                 /* note size mismatch of timeout vals in eventdev lib. */
589                 timeout32 = config->dequeue_timeout_ns;
590
591                 dlb2->global_dequeue_wait_ticks =
592                         timeout32 * (rte_get_timer_hz() / 1E9);
593         }
594
595         /* Does this platform support umonitor/umwait? */
596         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG)) {
597                 if (RTE_LIBRTE_PMD_DLB2_UMWAIT_CTL_STATE != 0 &&
598                     RTE_LIBRTE_PMD_DLB2_UMWAIT_CTL_STATE != 1) {
599                         DLB2_LOG_ERR("invalid value (%d) for RTE_LIBRTE_PMD_DLB2_UMWAIT_CTL_STATE, must be 0 or 1.\n",
600                                      RTE_LIBRTE_PMD_DLB2_UMWAIT_CTL_STATE);
601                         return -EINVAL;
602                 }
603                 dlb2->umwait_allowed = true;
604         }
605
606         rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
607         rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
608         /* 1 dir queue per dir port */
609         rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
610
611         /* Scale down nb_events_limit by 4 for directed credits, since there
612          * are 4x as many load-balanced credits.
613          */
614         rsrcs->num_ldb_credits = 0;
615         rsrcs->num_dir_credits = 0;
616
617         if (rsrcs->num_ldb_queues)
618                 rsrcs->num_ldb_credits = config->nb_events_limit;
619         if (rsrcs->num_dir_ports)
620                 rsrcs->num_dir_credits = config->nb_events_limit / 4;
621         if (dlb2->num_dir_credits_override != -1)
622                 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
623
624         if (dlb2_hw_create_sched_domain(handle, rsrcs) < 0) {
625                 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
626                 return -ENODEV;
627         }
628
629         dlb2->new_event_limit = config->nb_events_limit;
630         __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
631
632         /* Save number of ports/queues for this event dev */
633         dlb2->num_ports = config->nb_event_ports;
634         dlb2->num_queues = config->nb_event_queues;
635         dlb2->num_dir_ports = rsrcs->num_dir_ports;
636         dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
637         dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
638         dlb2->num_dir_queues = dlb2->num_dir_ports;
639         dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
640         dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
641         dlb2->dir_credit_pool = rsrcs->num_dir_credits;
642         dlb2->max_dir_credits = rsrcs->num_dir_credits;
643
644         dlb2->configured = true;
645
646         return 0;
647 }
648
649 static void
650 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
651                                     uint8_t port_id,
652                                     struct rte_event_port_conf *port_conf)
653 {
654         RTE_SET_USED(port_id);
655         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
656
657         port_conf->new_event_threshold = dlb2->new_event_limit;
658         port_conf->dequeue_depth = 32;
659         port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
660         port_conf->event_port_cfg = 0;
661 }
662
663 static void
664 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
665                                      uint8_t queue_id,
666                                      struct rte_event_queue_conf *queue_conf)
667 {
668         RTE_SET_USED(dev);
669         RTE_SET_USED(queue_id);
670
671         queue_conf->nb_atomic_flows = 1024;
672         queue_conf->nb_atomic_order_sequences = 64;
673         queue_conf->event_queue_cfg = 0;
674         queue_conf->priority = 0;
675 }
676
677 static int32_t
678 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
679 {
680         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
681         struct dlb2_get_sn_allocation_args cfg;
682         int ret;
683
684         cfg.group = group;
685
686         ret = dlb2_iface_get_sn_allocation(handle, &cfg);
687         if (ret < 0) {
688                 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
689                              ret, dlb2_error_strings[cfg.response.status]);
690                 return ret;
691         }
692
693         return cfg.response.id;
694 }
695
696 static int
697 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
698 {
699         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
700         struct dlb2_set_sn_allocation_args cfg;
701         int ret;
702
703         cfg.num = num;
704         cfg.group = group;
705
706         ret = dlb2_iface_set_sn_allocation(handle, &cfg);
707         if (ret < 0) {
708                 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
709                              ret, dlb2_error_strings[cfg.response.status]);
710                 return ret;
711         }
712
713         return ret;
714 }
715
716 static int32_t
717 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
718 {
719         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
720         struct dlb2_get_sn_occupancy_args cfg;
721         int ret;
722
723         cfg.group = group;
724
725         ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
726         if (ret < 0) {
727                 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
728                              ret, dlb2_error_strings[cfg.response.status]);
729                 return ret;
730         }
731
732         return cfg.response.id;
733 }
734
735 /* Query the current sequence number allocations and, if they conflict with the
736  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
737  * This is best-effort; if it fails, the PMD will attempt to configure the
738  * load-balanced queue and return an error.
739  */
740 static void
741 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
742                            const struct rte_event_queue_conf *queue_conf)
743 {
744         int grp_occupancy[DLB2_NUM_SN_GROUPS];
745         int grp_alloc[DLB2_NUM_SN_GROUPS];
746         int i, sequence_numbers;
747
748         sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
749
750         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
751                 int total_slots;
752
753                 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
754                 if (grp_alloc[i] < 0)
755                         return;
756
757                 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
758
759                 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
760                 if (grp_occupancy[i] < 0)
761                         return;
762
763                 /* DLB has at least one available slot for the requested
764                  * sequence numbers, so no further configuration required.
765                  */
766                 if (grp_alloc[i] == sequence_numbers &&
767                     grp_occupancy[i] < total_slots)
768                         return;
769         }
770
771         /* None of the sequence number groups are configured for the requested
772          * sequence numbers, so we have to reconfigure one of them. This is
773          * only possible if a group is not in use.
774          */
775         for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
776                 if (grp_occupancy[i] == 0)
777                         break;
778         }
779
780         if (i == DLB2_NUM_SN_GROUPS) {
781                 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
782                        __func__, sequence_numbers);
783                 return;
784         }
785
786         /* Attempt to configure slot i with the requested number of sequence
787          * numbers. Ignore the return value -- if this fails, the error will be
788          * caught during subsequent queue configuration.
789          */
790         dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
791 }
792
793 static int32_t
794 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
795                          struct dlb2_eventdev_queue *ev_queue,
796                          const struct rte_event_queue_conf *evq_conf)
797 {
798         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
799         struct dlb2_queue *queue = &ev_queue->qm_queue;
800         struct dlb2_create_ldb_queue_args cfg;
801         int32_t ret;
802         uint32_t qm_qid;
803         int sched_type = -1;
804
805         if (evq_conf == NULL)
806                 return -EINVAL;
807
808         if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
809                 if (evq_conf->nb_atomic_order_sequences != 0)
810                         sched_type = RTE_SCHED_TYPE_ORDERED;
811                 else
812                         sched_type = RTE_SCHED_TYPE_PARALLEL;
813         } else
814                 sched_type = evq_conf->schedule_type;
815
816         cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
817         cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
818         cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
819
820         if (sched_type != RTE_SCHED_TYPE_ORDERED) {
821                 cfg.num_sequence_numbers = 0;
822                 cfg.num_qid_inflights = 2048;
823         }
824
825         /* App should set this to the number of hardware flows they want, not
826          * the overall number of flows they're going to use. E.g. if app is
827          * using 64 flows and sets compression to 64, best-case they'll get
828          * 64 unique hashed flows in hardware.
829          */
830         switch (evq_conf->nb_atomic_flows) {
831         /* Valid DLB2 compression levels */
832         case 64:
833         case 128:
834         case 256:
835         case 512:
836         case (1 * 1024): /* 1K */
837         case (2 * 1024): /* 2K */
838         case (4 * 1024): /* 4K */
839         case (64 * 1024): /* 64K */
840                 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
841                 break;
842         default:
843                 /* Invalid compression level */
844                 cfg.lock_id_comp_level = 0; /* no compression */
845         }
846
847         if (ev_queue->depth_threshold == 0) {
848                 cfg.depth_threshold = RTE_PMD_DLB2_DEFAULT_DEPTH_THRESH;
849                 ev_queue->depth_threshold = RTE_PMD_DLB2_DEFAULT_DEPTH_THRESH;
850         } else
851                 cfg.depth_threshold = ev_queue->depth_threshold;
852
853         ret = dlb2_iface_ldb_queue_create(handle, &cfg);
854         if (ret < 0) {
855                 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
856                              ret, dlb2_error_strings[cfg.response.status]);
857                 return -EINVAL;
858         }
859
860         qm_qid = cfg.response.id;
861
862         /* Save off queue config for debug, resource lookups, and reconfig */
863         queue->num_qid_inflights = cfg.num_qid_inflights;
864         queue->num_atm_inflights = cfg.num_atomic_inflights;
865
866         queue->sched_type = sched_type;
867         queue->config_state = DLB2_CONFIGURED;
868
869         DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
870                      qm_qid,
871                      cfg.num_atomic_inflights,
872                      cfg.num_sequence_numbers,
873                      cfg.num_qid_inflights);
874
875         return qm_qid;
876 }
877
878 static int
879 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
880                               struct dlb2_eventdev_queue *ev_queue,
881                               const struct rte_event_queue_conf *queue_conf)
882 {
883         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
884         int32_t qm_qid;
885
886         if (queue_conf->nb_atomic_order_sequences)
887                 dlb2_program_sn_allocation(dlb2, queue_conf);
888
889         qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
890         if (qm_qid < 0) {
891                 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
892
893                 return qm_qid;
894         }
895
896         dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
897
898         ev_queue->qm_queue.id = qm_qid;
899
900         return 0;
901 }
902
903 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
904 {
905         int i, num = 0;
906
907         for (i = 0; i < dlb2->num_queues; i++) {
908                 if (dlb2->ev_queues[i].setup_done &&
909                     dlb2->ev_queues[i].qm_queue.is_directed)
910                         num++;
911         }
912
913         return num;
914 }
915
916 static void
917 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
918                          struct dlb2_eventdev_queue *ev_queue)
919 {
920         struct dlb2_eventdev_port *ev_port;
921         int i, j;
922
923         for (i = 0; i < dlb2->num_ports; i++) {
924                 ev_port = &dlb2->ev_ports[i];
925
926                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
927                         if (!ev_port->link[j].valid ||
928                             ev_port->link[j].queue_id != ev_queue->id)
929                                 continue;
930
931                         ev_port->link[j].valid = false;
932                         ev_port->num_links--;
933                 }
934         }
935
936         ev_queue->num_links = 0;
937 }
938
939 static int
940 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
941                           uint8_t ev_qid,
942                           const struct rte_event_queue_conf *queue_conf)
943 {
944         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
945         struct dlb2_eventdev_queue *ev_queue;
946         int ret;
947
948         if (queue_conf == NULL)
949                 return -EINVAL;
950
951         if (ev_qid >= dlb2->num_queues)
952                 return -EINVAL;
953
954         ev_queue = &dlb2->ev_queues[ev_qid];
955
956         ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
957                 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
958         ev_queue->id = ev_qid;
959         ev_queue->conf = *queue_conf;
960
961         if (!ev_queue->qm_queue.is_directed) {
962                 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
963         } else {
964                 /* The directed queue isn't setup until link time, at which
965                  * point we know its directed port ID. Directed queue setup
966                  * will only fail if this queue is already setup or there are
967                  * no directed queues left to configure.
968                  */
969                 ret = 0;
970
971                 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
972
973                 if (ev_queue->setup_done ||
974                     dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
975                         ret = -EINVAL;
976         }
977
978         /* Tear down pre-existing port->queue links */
979         if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
980                 dlb2_queue_link_teardown(dlb2, ev_queue);
981
982         if (!ret)
983                 ev_queue->setup_done = true;
984
985         return ret;
986 }
987
988 static int
989 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
990 {
991         struct dlb2_cq_pop_qe *qe;
992
993         qe = rte_zmalloc(mz_name,
994                         DLB2_NUM_QES_PER_CACHE_LINE *
995                                 sizeof(struct dlb2_cq_pop_qe),
996                         RTE_CACHE_LINE_SIZE);
997
998         if (qe == NULL) {
999                 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1000                 return -ENOMEM;
1001         }
1002         qm_port->consume_qe = qe;
1003
1004         qe->qe_valid = 0;
1005         qe->qe_frag = 0;
1006         qe->qe_comp = 0;
1007         qe->cq_token = 1;
1008         /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1009          * and so on.
1010          */
1011         qe->tokens = 0; /* set at run time */
1012         qe->meas_lat = 0;
1013         qe->no_dec = 0;
1014         /* Completion IDs are disabled */
1015         qe->cmp_id = 0;
1016
1017         return 0;
1018 }
1019
1020 static int
1021 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1022 {
1023         struct dlb2_enqueue_qe *qe;
1024
1025         qe = rte_zmalloc(mz_name,
1026                         DLB2_NUM_QES_PER_CACHE_LINE *
1027                                 sizeof(struct dlb2_enqueue_qe),
1028                         RTE_CACHE_LINE_SIZE);
1029
1030         if (qe == NULL) {
1031                 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1032                 return -ENOMEM;
1033         }
1034         qm_port->int_arm_qe = qe;
1035
1036         /* V2 - INT ARM is CQ_TOKEN + FRAG */
1037         qe->qe_valid = 0;
1038         qe->qe_frag = 1;
1039         qe->qe_comp = 0;
1040         qe->cq_token = 1;
1041         qe->meas_lat = 0;
1042         qe->no_dec = 0;
1043         /* Completion IDs are disabled */
1044         qe->cmp_id = 0;
1045
1046         return 0;
1047 }
1048
1049 static int
1050 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1051 {
1052         int ret, sz;
1053
1054         sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1055
1056         qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1057
1058         if (qm_port->qe4 == NULL) {
1059                 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1060                 ret = -ENOMEM;
1061                 goto error_exit;
1062         }
1063
1064         ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1065         if (ret < 0) {
1066                 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1067                 goto error_exit;
1068         }
1069
1070         ret = dlb2_init_consume_qe(qm_port, mz_name);
1071         if (ret < 0) {
1072                 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1073                 goto error_exit;
1074         }
1075
1076         return 0;
1077
1078 error_exit:
1079
1080         dlb2_free_qe_mem(qm_port);
1081
1082         return ret;
1083 }
1084
1085 static int
1086 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1087                         struct dlb2_eventdev_port *ev_port,
1088                         uint32_t dequeue_depth,
1089                         uint32_t enqueue_depth)
1090 {
1091         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1092         struct dlb2_create_ldb_port_args cfg = { {0} };
1093         int ret;
1094         struct dlb2_port *qm_port = NULL;
1095         char mz_name[RTE_MEMZONE_NAMESIZE];
1096         uint32_t qm_port_id;
1097         uint16_t ldb_credit_high_watermark;
1098         uint16_t dir_credit_high_watermark;
1099
1100         if (handle == NULL)
1101                 return -EINVAL;
1102
1103         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1104                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1105                              DLB2_MIN_CQ_DEPTH);
1106                 return -EINVAL;
1107         }
1108
1109         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1110                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1111                              DLB2_MIN_ENQUEUE_DEPTH);
1112                 return -EINVAL;
1113         }
1114
1115         rte_spinlock_lock(&handle->resource_lock);
1116
1117         /* We round up to the next power of 2 if necessary */
1118         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1119         cfg.cq_depth_threshold = 1;
1120
1121         cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1122
1123         if (handle->cos_id == DLB2_COS_DEFAULT)
1124                 cfg.cos_id = 0;
1125         else
1126                 cfg.cos_id = handle->cos_id;
1127
1128         cfg.cos_strict = 0;
1129
1130         /* User controls the LDB high watermark via enqueue depth. The DIR high
1131          * watermark is equal, unless the directed credit pool is too small.
1132          */
1133         ldb_credit_high_watermark = enqueue_depth;
1134
1135         /* If there are no directed ports, the kernel driver will ignore this
1136          * port's directed credit settings. Don't use enqueue_depth if it would
1137          * require more directed credits than are available.
1138          */
1139         dir_credit_high_watermark =
1140                 RTE_MIN(enqueue_depth,
1141                         handle->cfg.num_dir_credits / dlb2->num_ports);
1142
1143         /* Per QM values */
1144
1145         ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1146         if (ret < 0) {
1147                 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1148                              ret, dlb2_error_strings[cfg.response.status]);
1149                 goto error_exit;
1150         }
1151
1152         qm_port_id = cfg.response.id;
1153
1154         DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1155                      ev_port->id, qm_port_id);
1156
1157         qm_port = &ev_port->qm_port;
1158         qm_port->ev_port = ev_port; /* back ptr */
1159         qm_port->dlb2 = dlb2; /* back ptr */
1160         /*
1161          * Allocate and init local qe struct(s).
1162          * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1163          */
1164
1165         snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1166                  ev_port->id);
1167
1168         ret = dlb2_init_qe_mem(qm_port, mz_name);
1169         if (ret < 0) {
1170                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1171                 goto error_exit;
1172         }
1173
1174         qm_port->id = qm_port_id;
1175
1176         qm_port->cached_ldb_credits = 0;
1177         qm_port->cached_dir_credits = 0;
1178         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1179          * the effective depth is smaller.
1180          */
1181         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1182         qm_port->cq_idx = 0;
1183         qm_port->cq_idx_unmasked = 0;
1184
1185         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1186                 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1187         else
1188                 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1189
1190         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1191         /* starting value of gen bit - it toggles at wrap time */
1192         qm_port->gen_bit = 1;
1193
1194         qm_port->int_armed = false;
1195
1196         /* Save off for later use in info and lookup APIs. */
1197         qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1198
1199         qm_port->dequeue_depth = dequeue_depth;
1200         qm_port->token_pop_thresh = dequeue_depth;
1201         qm_port->owed_tokens = 0;
1202         qm_port->issued_releases = 0;
1203
1204         /* Save config message too. */
1205         rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1206
1207         /* update state */
1208         qm_port->state = PORT_STARTED; /* enabled at create time */
1209         qm_port->config_state = DLB2_CONFIGURED;
1210
1211         qm_port->dir_credits = dir_credit_high_watermark;
1212         qm_port->ldb_credits = ldb_credit_high_watermark;
1213         qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1214         qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1215
1216         DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1217                      qm_port_id,
1218                      dequeue_depth,
1219                      qm_port->ldb_credits,
1220                      qm_port->dir_credits);
1221
1222         rte_spinlock_unlock(&handle->resource_lock);
1223
1224         return 0;
1225
1226 error_exit:
1227
1228         if (qm_port)
1229                 dlb2_free_qe_mem(qm_port);
1230
1231         rte_spinlock_unlock(&handle->resource_lock);
1232
1233         DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1234
1235         return ret;
1236 }
1237
1238 static void
1239 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1240                         struct dlb2_eventdev_port *ev_port)
1241 {
1242         struct dlb2_eventdev_queue *ev_queue;
1243         int i;
1244
1245         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1246                 if (!ev_port->link[i].valid)
1247                         continue;
1248
1249                 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1250
1251                 ev_port->link[i].valid = false;
1252                 ev_port->num_links--;
1253                 ev_queue->num_links--;
1254         }
1255 }
1256
1257 static int
1258 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1259                         struct dlb2_eventdev_port *ev_port,
1260                         uint32_t dequeue_depth,
1261                         uint32_t enqueue_depth)
1262 {
1263         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1264         struct dlb2_create_dir_port_args cfg = { {0} };
1265         int ret;
1266         struct dlb2_port *qm_port = NULL;
1267         char mz_name[RTE_MEMZONE_NAMESIZE];
1268         uint32_t qm_port_id;
1269         uint16_t ldb_credit_high_watermark;
1270         uint16_t dir_credit_high_watermark;
1271
1272         if (dlb2 == NULL || handle == NULL)
1273                 return -EINVAL;
1274
1275         if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1276                 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1277                              DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1278                 return -EINVAL;
1279         }
1280
1281         if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1282                 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1283                              DLB2_MIN_ENQUEUE_DEPTH);
1284                 return -EINVAL;
1285         }
1286
1287         rte_spinlock_lock(&handle->resource_lock);
1288
1289         /* Directed queues are configured at link time. */
1290         cfg.queue_id = -1;
1291
1292         /* We round up to the next power of 2 if necessary */
1293         cfg.cq_depth = rte_align32pow2(dequeue_depth);
1294         cfg.cq_depth_threshold = 1;
1295
1296         /* User controls the LDB high watermark via enqueue depth. The DIR high
1297          * watermark is equal, unless the directed credit pool is too small.
1298          */
1299         ldb_credit_high_watermark = enqueue_depth;
1300
1301         /* Don't use enqueue_depth if it would require more directed credits
1302          * than are available.
1303          */
1304         dir_credit_high_watermark =
1305                 RTE_MIN(enqueue_depth,
1306                         handle->cfg.num_dir_credits / dlb2->num_ports);
1307
1308         /* Per QM values */
1309
1310         ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1311         if (ret < 0) {
1312                 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1313                              ret, dlb2_error_strings[cfg.response.status]);
1314                 goto error_exit;
1315         }
1316
1317         qm_port_id = cfg.response.id;
1318
1319         DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1320                      ev_port->id, qm_port_id);
1321
1322         qm_port = &ev_port->qm_port;
1323         qm_port->ev_port = ev_port; /* back ptr */
1324         qm_port->dlb2 = dlb2;  /* back ptr */
1325
1326         /*
1327          * Init local qe struct(s).
1328          * Note: MOVDIR64 requires the enqueue QE to be aligned
1329          */
1330
1331         snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1332                  ev_port->id);
1333
1334         ret = dlb2_init_qe_mem(qm_port, mz_name);
1335
1336         if (ret < 0) {
1337                 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1338                 goto error_exit;
1339         }
1340
1341         qm_port->id = qm_port_id;
1342
1343         qm_port->cached_ldb_credits = 0;
1344         qm_port->cached_dir_credits = 0;
1345         /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1346          * the effective depth is smaller.
1347          */
1348         qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1349         qm_port->cq_idx = 0;
1350         qm_port->cq_idx_unmasked = 0;
1351
1352         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1353                 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1354         else
1355                 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1356
1357         qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1358         /* starting value of gen bit - it toggles at wrap time */
1359         qm_port->gen_bit = 1;
1360
1361         qm_port->int_armed = false;
1362
1363         /* Save off for later use in info and lookup APIs. */
1364         qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1365
1366         qm_port->dequeue_depth = dequeue_depth;
1367
1368         /* Directed ports are auto-pop, by default. */
1369         qm_port->token_pop_mode = AUTO_POP;
1370         qm_port->owed_tokens = 0;
1371         qm_port->issued_releases = 0;
1372
1373         /* Save config message too. */
1374         rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1375
1376         /* update state */
1377         qm_port->state = PORT_STARTED; /* enabled at create time */
1378         qm_port->config_state = DLB2_CONFIGURED;
1379
1380         qm_port->dir_credits = dir_credit_high_watermark;
1381         qm_port->ldb_credits = ldb_credit_high_watermark;
1382         qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1383         qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1384
1385         DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1386                      qm_port_id,
1387                      dequeue_depth,
1388                      dir_credit_high_watermark,
1389                      ldb_credit_high_watermark);
1390
1391         rte_spinlock_unlock(&handle->resource_lock);
1392
1393         return 0;
1394
1395 error_exit:
1396
1397         if (qm_port)
1398                 dlb2_free_qe_mem(qm_port);
1399
1400         rte_spinlock_unlock(&handle->resource_lock);
1401
1402         DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1403
1404         return ret;
1405 }
1406
1407 static int
1408 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1409                          uint8_t ev_port_id,
1410                          const struct rte_event_port_conf *port_conf)
1411 {
1412         struct dlb2_eventdev *dlb2;
1413         struct dlb2_eventdev_port *ev_port;
1414         int ret;
1415
1416         if (dev == NULL || port_conf == NULL) {
1417                 DLB2_LOG_ERR("Null parameter\n");
1418                 return -EINVAL;
1419         }
1420
1421         dlb2 = dlb2_pmd_priv(dev);
1422
1423         if (ev_port_id >= DLB2_MAX_NUM_PORTS)
1424                 return -EINVAL;
1425
1426         if (port_conf->dequeue_depth >
1427                 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1428             port_conf->enqueue_depth >
1429                 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1430                 return -EINVAL;
1431
1432         ev_port = &dlb2->ev_ports[ev_port_id];
1433         /* configured? */
1434         if (ev_port->setup_done) {
1435                 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1436                 return -EINVAL;
1437         }
1438
1439         ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1440                 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1441
1442         if (!ev_port->qm_port.is_directed) {
1443                 ret = dlb2_hw_create_ldb_port(dlb2,
1444                                               ev_port,
1445                                               port_conf->dequeue_depth,
1446                                               port_conf->enqueue_depth);
1447                 if (ret < 0) {
1448                         DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1449                                      ev_port_id);
1450
1451                         return ret;
1452                 }
1453         } else {
1454                 ret = dlb2_hw_create_dir_port(dlb2,
1455                                               ev_port,
1456                                               port_conf->dequeue_depth,
1457                                               port_conf->enqueue_depth);
1458                 if (ret < 0) {
1459                         DLB2_LOG_ERR("Failed to create the DIR port\n");
1460                         return ret;
1461                 }
1462         }
1463
1464         /* Save off port config for reconfig */
1465         ev_port->conf = *port_conf;
1466
1467         ev_port->id = ev_port_id;
1468         ev_port->enq_configured = true;
1469         ev_port->setup_done = true;
1470         ev_port->inflight_max = port_conf->new_event_threshold;
1471         ev_port->implicit_release = !(port_conf->event_port_cfg &
1472                   RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1473         ev_port->outstanding_releases = 0;
1474         ev_port->inflight_credits = 0;
1475         ev_port->credit_update_quanta = RTE_LIBRTE_PMD_DLB2_SW_CREDIT_QUANTA;
1476         ev_port->dlb2 = dlb2; /* reverse link */
1477
1478         /* Tear down pre-existing port->queue links */
1479         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1480                 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1481
1482         dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1483
1484         return 0;
1485 }
1486
1487 static int16_t
1488 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1489                             uint32_t qm_port_id,
1490                             uint16_t qm_qid,
1491                             uint8_t priority)
1492 {
1493         struct dlb2_map_qid_args cfg;
1494         int32_t ret;
1495
1496         if (handle == NULL)
1497                 return -EINVAL;
1498
1499         /* Build message */
1500         cfg.port_id = qm_port_id;
1501         cfg.qid = qm_qid;
1502         cfg.priority = EV_TO_DLB2_PRIO(priority);
1503
1504         ret = dlb2_iface_map_qid(handle, &cfg);
1505         if (ret < 0) {
1506                 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1507                              ret, dlb2_error_strings[cfg.response.status]);
1508                 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1509                              handle->domain_id, cfg.port_id,
1510                              cfg.qid,
1511                              cfg.priority);
1512         } else {
1513                 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1514                              qm_qid, qm_port_id);
1515         }
1516
1517         return ret;
1518 }
1519
1520 static int
1521 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1522                           struct dlb2_eventdev_port *ev_port,
1523                           struct dlb2_eventdev_queue *ev_queue,
1524                           uint8_t priority)
1525 {
1526         int first_avail = -1;
1527         int ret, i;
1528
1529         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1530                 if (ev_port->link[i].valid) {
1531                         if (ev_port->link[i].queue_id == ev_queue->id &&
1532                             ev_port->link[i].priority == priority) {
1533                                 if (ev_port->link[i].mapped)
1534                                         return 0; /* already mapped */
1535                                 first_avail = i;
1536                         }
1537                 } else if (first_avail == -1)
1538                         first_avail = i;
1539         }
1540         if (first_avail == -1) {
1541                 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1542                              ev_port->qm_port.id);
1543                 return -EINVAL;
1544         }
1545
1546         ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1547                                           ev_port->qm_port.id,
1548                                           ev_queue->qm_queue.id,
1549                                           priority);
1550
1551         if (!ret)
1552                 ev_port->link[first_avail].mapped = true;
1553
1554         return ret;
1555 }
1556
1557 static int32_t
1558 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1559                          struct dlb2_eventdev_queue *ev_queue,
1560                          int32_t qm_port_id)
1561 {
1562         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1563         struct dlb2_create_dir_queue_args cfg;
1564         int32_t ret;
1565
1566         /* The directed port is always configured before its queue */
1567         cfg.port_id = qm_port_id;
1568
1569         if (ev_queue->depth_threshold == 0) {
1570                 cfg.depth_threshold = RTE_PMD_DLB2_DEFAULT_DEPTH_THRESH;
1571                 ev_queue->depth_threshold = RTE_PMD_DLB2_DEFAULT_DEPTH_THRESH;
1572         } else
1573                 cfg.depth_threshold = ev_queue->depth_threshold;
1574
1575         ret = dlb2_iface_dir_queue_create(handle, &cfg);
1576         if (ret < 0) {
1577                 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1578                              ret, dlb2_error_strings[cfg.response.status]);
1579                 return -EINVAL;
1580         }
1581
1582         return cfg.response.id;
1583 }
1584
1585 static int
1586 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1587                               struct dlb2_eventdev_queue *ev_queue,
1588                               struct dlb2_eventdev_port *ev_port)
1589 {
1590         int32_t qm_qid;
1591
1592         qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1593
1594         if (qm_qid < 0) {
1595                 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1596                 return qm_qid;
1597         }
1598
1599         dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1600
1601         ev_queue->qm_queue.id = qm_qid;
1602
1603         return 0;
1604 }
1605
1606 static int
1607 dlb2_do_port_link(struct rte_eventdev *dev,
1608                   struct dlb2_eventdev_queue *ev_queue,
1609                   struct dlb2_eventdev_port *ev_port,
1610                   uint8_t prio)
1611 {
1612         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1613         int err;
1614
1615         /* Don't link until start time. */
1616         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1617                 return 0;
1618
1619         if (ev_queue->qm_queue.is_directed)
1620                 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1621         else
1622                 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1623
1624         if (err) {
1625                 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1626                              ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1627                              ev_queue->id, ev_port->id);
1628
1629                 rte_errno = err;
1630                 return -1;
1631         }
1632
1633         return 0;
1634 }
1635
1636 static int
1637 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1638                         uint8_t queue_id,
1639                         bool link_exists,
1640                         int index)
1641 {
1642         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1643         struct dlb2_eventdev_queue *ev_queue;
1644         bool port_is_dir, queue_is_dir;
1645
1646         if (queue_id > dlb2->num_queues) {
1647                 rte_errno = -EINVAL;
1648                 return -1;
1649         }
1650
1651         ev_queue = &dlb2->ev_queues[queue_id];
1652
1653         if (!ev_queue->setup_done &&
1654             ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1655                 rte_errno = -EINVAL;
1656                 return -1;
1657         }
1658
1659         port_is_dir = ev_port->qm_port.is_directed;
1660         queue_is_dir = ev_queue->qm_queue.is_directed;
1661
1662         if (port_is_dir != queue_is_dir) {
1663                 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1664                              queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1665                              port_is_dir ? "DIR" : "LDB", ev_port->id);
1666
1667                 rte_errno = -EINVAL;
1668                 return -1;
1669         }
1670
1671         /* Check if there is space for the requested link */
1672         if (!link_exists && index == -1) {
1673                 DLB2_LOG_ERR("no space for new link\n");
1674                 rte_errno = -ENOSPC;
1675                 return -1;
1676         }
1677
1678         /* Check if the directed port is already linked */
1679         if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
1680             !link_exists) {
1681                 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
1682                              ev_port->id);
1683                 rte_errno = -EINVAL;
1684                 return -1;
1685         }
1686
1687         /* Check if the directed queue is already linked */
1688         if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
1689             !link_exists) {
1690                 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
1691                              ev_queue->id);
1692                 rte_errno = -EINVAL;
1693                 return -1;
1694         }
1695
1696         return 0;
1697 }
1698
1699 static int
1700 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
1701                         const uint8_t queues[], const uint8_t priorities[],
1702                         uint16_t nb_links)
1703
1704 {
1705         struct dlb2_eventdev_port *ev_port = event_port;
1706         struct dlb2_eventdev *dlb2;
1707         int i, j;
1708
1709         RTE_SET_USED(dev);
1710
1711         if (ev_port == NULL) {
1712                 DLB2_LOG_ERR("dlb2: evport not setup\n");
1713                 rte_errno = -EINVAL;
1714                 return 0;
1715         }
1716
1717         if (!ev_port->setup_done &&
1718             ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
1719                 DLB2_LOG_ERR("dlb2: evport not setup\n");
1720                 rte_errno = -EINVAL;
1721                 return 0;
1722         }
1723
1724         /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
1725          * queues pointer.
1726          */
1727         if (nb_links == 0) {
1728                 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
1729                 return 0; /* Ignore and return success */
1730         }
1731
1732         dlb2 = ev_port->dlb2;
1733
1734         DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
1735                      nb_links,
1736                      ev_port->qm_port.is_directed ? "DIR" : "LDB",
1737                      ev_port->id);
1738
1739         for (i = 0; i < nb_links; i++) {
1740                 struct dlb2_eventdev_queue *ev_queue;
1741                 uint8_t queue_id, prio;
1742                 bool found = false;
1743                 int index = -1;
1744
1745                 queue_id = queues[i];
1746                 prio = priorities[i];
1747
1748                 /* Check if the link already exists. */
1749                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
1750                         if (ev_port->link[j].valid) {
1751                                 if (ev_port->link[j].queue_id == queue_id) {
1752                                         found = true;
1753                                         index = j;
1754                                         break;
1755                                 }
1756                         } else if (index == -1) {
1757                                 index = j;
1758                         }
1759
1760                 /* could not link */
1761                 if (index == -1)
1762                         break;
1763
1764                 /* Check if already linked at the requested priority */
1765                 if (found && ev_port->link[j].priority == prio)
1766                         continue;
1767
1768                 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
1769                         break; /* return index of offending queue */
1770
1771                 ev_queue = &dlb2->ev_queues[queue_id];
1772
1773                 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
1774                         break; /* return index of offending queue */
1775
1776                 ev_queue->num_links++;
1777
1778                 ev_port->link[index].queue_id = queue_id;
1779                 ev_port->link[index].priority = prio;
1780                 ev_port->link[index].valid = true;
1781                 /* Entry already exists?  If so, then must be prio change */
1782                 if (!found)
1783                         ev_port->num_links++;
1784         }
1785         return i;
1786 }
1787
1788 static int16_t
1789 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
1790                                 uint32_t qm_port_id,
1791                                 uint16_t qm_qid)
1792 {
1793         struct dlb2_unmap_qid_args cfg;
1794         int32_t ret;
1795
1796         if (handle == NULL)
1797                 return -EINVAL;
1798
1799         cfg.port_id = qm_port_id;
1800         cfg.qid = qm_qid;
1801
1802         ret = dlb2_iface_unmap_qid(handle, &cfg);
1803         if (ret < 0)
1804                 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
1805                              ret, dlb2_error_strings[cfg.response.status]);
1806
1807         return ret;
1808 }
1809
1810 static int
1811 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
1812                             struct dlb2_eventdev_port *ev_port,
1813                             struct dlb2_eventdev_queue *ev_queue)
1814 {
1815         int ret, i;
1816
1817         /* Don't unlink until start time. */
1818         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1819                 return 0;
1820
1821         for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1822                 if (ev_port->link[i].valid &&
1823                     ev_port->link[i].queue_id == ev_queue->id)
1824                         break; /* found */
1825         }
1826
1827         /* This is expected with eventdev API!
1828          * It blindly attemmpts to unmap all queues.
1829          */
1830         if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
1831                 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
1832                              ev_queue->qm_queue.id,
1833                              ev_port->qm_port.id);
1834                 return 0;
1835         }
1836
1837         ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
1838                                               ev_port->qm_port.id,
1839                                               ev_queue->qm_queue.id);
1840         if (!ret)
1841                 ev_port->link[i].mapped = false;
1842
1843         return ret;
1844 }
1845
1846 static int
1847 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
1848                           uint8_t queues[], uint16_t nb_unlinks)
1849 {
1850         struct dlb2_eventdev_port *ev_port = event_port;
1851         struct dlb2_eventdev *dlb2;
1852         int i;
1853
1854         RTE_SET_USED(dev);
1855
1856         if (!ev_port->setup_done) {
1857                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
1858                              ev_port->id);
1859                 rte_errno = -EINVAL;
1860                 return 0;
1861         }
1862
1863         if (queues == NULL || nb_unlinks == 0) {
1864                 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
1865                 return 0; /* Ignore and return success */
1866         }
1867
1868         if (ev_port->qm_port.is_directed) {
1869                 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
1870                              ev_port->id);
1871                 rte_errno = 0;
1872                 return nb_unlinks; /* as if success */
1873         }
1874
1875         dlb2 = ev_port->dlb2;
1876
1877         for (i = 0; i < nb_unlinks; i++) {
1878                 struct dlb2_eventdev_queue *ev_queue;
1879                 int ret, j;
1880
1881                 if (queues[i] >= dlb2->num_queues) {
1882                         DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
1883                         rte_errno = -EINVAL;
1884                         return i; /* return index of offending queue */
1885                 }
1886
1887                 ev_queue = &dlb2->ev_queues[queues[i]];
1888
1889                 /* Does a link exist? */
1890                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
1891                         if (ev_port->link[j].queue_id == queues[i] &&
1892                             ev_port->link[j].valid)
1893                                 break;
1894
1895                 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
1896                         continue;
1897
1898                 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
1899                 if (ret) {
1900                         DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
1901                                      ret, ev_port->id, queues[i]);
1902                         rte_errno = -ENOENT;
1903                         return i; /* return index of offending queue */
1904                 }
1905
1906                 ev_port->link[j].valid = false;
1907                 ev_port->num_links--;
1908                 ev_queue->num_links--;
1909         }
1910
1911         return nb_unlinks;
1912 }
1913
1914 static int
1915 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
1916                                        void *event_port)
1917 {
1918         struct dlb2_eventdev_port *ev_port = event_port;
1919         struct dlb2_eventdev *dlb2;
1920         struct dlb2_hw_dev *handle;
1921         struct dlb2_pending_port_unmaps_args cfg;
1922         int ret;
1923
1924         RTE_SET_USED(dev);
1925
1926         if (!ev_port->setup_done) {
1927                 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
1928                              ev_port->id);
1929                 rte_errno = -EINVAL;
1930                 return 0;
1931         }
1932
1933         cfg.port_id = ev_port->qm_port.id;
1934         dlb2 = ev_port->dlb2;
1935         handle = &dlb2->qm_instance;
1936         ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
1937
1938         if (ret < 0) {
1939                 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
1940                              ret, dlb2_error_strings[cfg.response.status]);
1941                 return ret;
1942         }
1943
1944         return cfg.response.id;
1945 }
1946
1947 static int
1948 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
1949 {
1950         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1951         int ret, i;
1952
1953         /* If an event queue or port was previously configured, but hasn't been
1954          * reconfigured, reapply its original configuration.
1955          */
1956         for (i = 0; i < dlb2->num_queues; i++) {
1957                 struct dlb2_eventdev_queue *ev_queue;
1958
1959                 ev_queue = &dlb2->ev_queues[i];
1960
1961                 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
1962                         continue;
1963
1964                 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
1965                 if (ret < 0) {
1966                         DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
1967                         return ret;
1968                 }
1969         }
1970
1971         for (i = 0; i < dlb2->num_ports; i++) {
1972                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
1973
1974                 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
1975                         continue;
1976
1977                 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
1978                 if (ret < 0) {
1979                         DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
1980                                      i);
1981                         return ret;
1982                 }
1983         }
1984
1985         return 0;
1986 }
1987
1988 static int
1989 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
1990 {
1991         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1992         int i;
1993
1994         /* Perform requested port->queue links */
1995         for (i = 0; i < dlb2->num_ports; i++) {
1996                 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
1997                 int j;
1998
1999                 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2000                         struct dlb2_eventdev_queue *ev_queue;
2001                         uint8_t prio, queue_id;
2002
2003                         if (!ev_port->link[j].valid)
2004                                 continue;
2005
2006                         prio = ev_port->link[j].priority;
2007                         queue_id = ev_port->link[j].queue_id;
2008
2009                         if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2010                                 return -EINVAL;
2011
2012                         ev_queue = &dlb2->ev_queues[queue_id];
2013
2014                         if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2015                                 return -EINVAL;
2016                 }
2017         }
2018
2019         return 0;
2020 }
2021
2022 static int
2023 dlb2_eventdev_start(struct rte_eventdev *dev)
2024 {
2025         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2026         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2027         struct dlb2_start_domain_args cfg;
2028         int ret, i;
2029
2030         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2031         if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2032                 DLB2_LOG_ERR("bad state %d for dev_start\n",
2033                              (int)dlb2->run_state);
2034                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2035                 return -EINVAL;
2036         }
2037         dlb2->run_state = DLB2_RUN_STATE_STARTING;
2038         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2039
2040         /* If the device was configured more than once, some event ports and/or
2041          * queues may need to be reconfigured.
2042          */
2043         ret = dlb2_eventdev_reapply_configuration(dev);
2044         if (ret)
2045                 return ret;
2046
2047         /* The DLB PMD delays port links until the device is started. */
2048         ret = dlb2_eventdev_apply_port_links(dev);
2049         if (ret)
2050                 return ret;
2051
2052         for (i = 0; i < dlb2->num_ports; i++) {
2053                 if (!dlb2->ev_ports[i].setup_done) {
2054                         DLB2_LOG_ERR("dlb2: port %d not setup", i);
2055                         return -ESTALE;
2056                 }
2057         }
2058
2059         for (i = 0; i < dlb2->num_queues; i++) {
2060                 if (dlb2->ev_queues[i].num_links == 0) {
2061                         DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2062                         return -ENOLINK;
2063                 }
2064         }
2065
2066         ret = dlb2_iface_sched_domain_start(handle, &cfg);
2067         if (ret < 0) {
2068                 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2069                              ret, dlb2_error_strings[cfg.response.status]);
2070                 return ret;
2071         }
2072
2073         dlb2->run_state = DLB2_RUN_STATE_STARTED;
2074         DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2075
2076         return 0;
2077 }
2078
2079 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2080         {
2081                 /* Load-balanced cmd bytes */
2082                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2083                 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2084                 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2085         },
2086         {
2087                 /* Directed cmd bytes */
2088                 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2089                 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2090                 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2091         },
2092 };
2093
2094 static inline uint32_t
2095 dlb2_port_credits_get(struct dlb2_port *qm_port,
2096                       enum dlb2_hw_queue_types type)
2097 {
2098         uint32_t credits = *qm_port->credit_pool[type];
2099         uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
2100
2101         if (unlikely(credits < batch_size))
2102                 batch_size = credits;
2103
2104         if (likely(credits &&
2105                    __atomic_compare_exchange_n(
2106                         qm_port->credit_pool[type],
2107                         &credits, credits - batch_size, false,
2108                         __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2109                 return batch_size;
2110         else
2111                 return 0;
2112 }
2113
2114 static inline void
2115 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2116                           struct dlb2_eventdev_port *ev_port)
2117 {
2118         uint16_t quanta = ev_port->credit_update_quanta;
2119
2120         if (ev_port->inflight_credits >= quanta * 2) {
2121                 /* Replenish credits, saving one quanta for enqueues */
2122                 uint16_t val = ev_port->inflight_credits - quanta;
2123
2124                 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2125                 ev_port->inflight_credits -= val;
2126         }
2127 }
2128
2129 static inline int
2130 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2131                               struct dlb2_eventdev_port *ev_port)
2132 {
2133         uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2134                                                 __ATOMIC_SEQ_CST);
2135         const int num = 1;
2136
2137         if (unlikely(ev_port->inflight_max < sw_inflights)) {
2138                 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2139                 rte_errno = -ENOSPC;
2140                 return 1;
2141         }
2142
2143         if (ev_port->inflight_credits < num) {
2144                 /* check if event enqueue brings ev_port over max threshold */
2145                 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2146
2147                 if (sw_inflights + credit_update_quanta >
2148                                 dlb2->new_event_limit) {
2149                         DLB2_INC_STAT(
2150                         ev_port->stats.traffic.tx_nospc_new_event_limit,
2151                         1);
2152                         rte_errno = -ENOSPC;
2153                         return 1;
2154                 }
2155
2156                 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2157                                    __ATOMIC_SEQ_CST);
2158                 ev_port->inflight_credits += (credit_update_quanta);
2159
2160                 if (ev_port->inflight_credits < num) {
2161                         DLB2_INC_STAT(
2162                         ev_port->stats.traffic.tx_nospc_inflight_credits,
2163                         1);
2164                         rte_errno = -ENOSPC;
2165                         return 1;
2166                 }
2167         }
2168
2169         return 0;
2170 }
2171
2172 static inline int
2173 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2174 {
2175         if (unlikely(qm_port->cached_ldb_credits == 0)) {
2176                 qm_port->cached_ldb_credits =
2177                         dlb2_port_credits_get(qm_port,
2178                                               DLB2_LDB_QUEUE);
2179                 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2180                         DLB2_INC_STAT(
2181                         qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2182                         1);
2183                         DLB2_LOG_DBG("ldb credits exhausted\n");
2184                         return 1; /* credits exhausted */
2185                 }
2186         }
2187
2188         return 0;
2189 }
2190
2191 static inline int
2192 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2193 {
2194         if (unlikely(qm_port->cached_dir_credits == 0)) {
2195                 qm_port->cached_dir_credits =
2196                         dlb2_port_credits_get(qm_port,
2197                                               DLB2_DIR_QUEUE);
2198                 if (unlikely(qm_port->cached_dir_credits == 0)) {
2199                         DLB2_INC_STAT(
2200                         qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2201                         1);
2202                         DLB2_LOG_DBG("dir credits exhausted\n");
2203                         return 1; /* credits exhausted */
2204                 }
2205         }
2206
2207         return 0;
2208 }
2209
2210 static __rte_always_inline void
2211 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2212               struct process_local_port_data *port_data)
2213 {
2214         dlb2_movdir64b(port_data->pp_addr, qe4);
2215 }
2216
2217 static inline int
2218 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2219 {
2220         struct process_local_port_data *port_data;
2221         struct dlb2_cq_pop_qe *qe;
2222
2223         RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2224
2225         qe = qm_port->consume_qe;
2226
2227         qe->tokens = num - 1;
2228
2229         /* No store fence needed since no pointer is being sent, and CQ token
2230          * pops can be safely reordered with other HCWs.
2231          */
2232         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2233
2234         dlb2_movntdq_single(port_data->pp_addr, qe);
2235
2236         DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2237
2238         qm_port->owed_tokens = 0;
2239
2240         return 0;
2241 }
2242
2243 static inline void
2244 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2245                    bool do_sfence,
2246                    struct process_local_port_data *port_data)
2247 {
2248         /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2249          * application writes complete before enqueueing the QE.
2250          */
2251         if (do_sfence)
2252                 rte_wmb();
2253
2254         dlb2_pp_write(qm_port->qe4, port_data);
2255 }
2256
2257 static inline void
2258 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2259 {
2260         struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2261         int num = qm_port->owed_tokens;
2262
2263         qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2264         qe[idx].tokens = num - 1;
2265
2266         qm_port->owed_tokens = 0;
2267 }
2268
2269 static inline void
2270 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2271                       const struct rte_event ev[],
2272                       int num,
2273                       uint8_t *sched_type,
2274                       uint8_t *queue_id)
2275 {
2276         struct dlb2_enqueue_qe *qe;
2277         uint16_t sched_word[4];
2278         __m128i sse_qe[2];
2279         int i;
2280
2281         qe = qm_port->qe4;
2282
2283         sse_qe[0] = _mm_setzero_si128();
2284         sse_qe[1] = _mm_setzero_si128();
2285
2286         switch (num) {
2287         case 4:
2288                 /* Construct the metadata portion of two HCWs in one 128b SSE
2289                  * register. HCW metadata is constructed in the SSE registers
2290                  * like so:
2291                  * sse_qe[0][63:0]:   qe[0]'s metadata
2292                  * sse_qe[0][127:64]: qe[1]'s metadata
2293                  * sse_qe[1][63:0]:   qe[2]'s metadata
2294                  * sse_qe[1][127:64]: qe[3]'s metadata
2295                  */
2296
2297                 /* Convert the event operation into a command byte and store it
2298                  * in the metadata:
2299                  * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
2300                  * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2301                  * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
2302                  * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2303                  */
2304 #define DLB2_QE_CMD_BYTE 7
2305                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2306                                 cmd_byte_map[qm_port->is_directed][ev[0].op],
2307                                 DLB2_QE_CMD_BYTE);
2308                 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2309                                 cmd_byte_map[qm_port->is_directed][ev[1].op],
2310                                 DLB2_QE_CMD_BYTE + 8);
2311                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2312                                 cmd_byte_map[qm_port->is_directed][ev[2].op],
2313                                 DLB2_QE_CMD_BYTE);
2314                 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2315                                 cmd_byte_map[qm_port->is_directed][ev[3].op],
2316                                 DLB2_QE_CMD_BYTE + 8);
2317
2318                 /* Store priority, scheduling type, and queue ID in the sched
2319                  * word array because these values are re-used when the
2320                  * destination is a directed queue.
2321                  */
2322                 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2323                                 sched_type[0] << 8 |
2324                                 queue_id[0];
2325                 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2326                                 sched_type[1] << 8 |
2327                                 queue_id[1];
2328                 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2329                                 sched_type[2] << 8 |
2330                                 queue_id[2];
2331                 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2332                                 sched_type[3] << 8 |
2333                                 queue_id[3];
2334
2335                 /* Store the event priority, scheduling type, and queue ID in
2336                  * the metadata:
2337                  * sse_qe[0][31:16] = sched_word[0]
2338                  * sse_qe[0][95:80] = sched_word[1]
2339                  * sse_qe[1][31:16] = sched_word[2]
2340                  * sse_qe[1][95:80] = sched_word[3]
2341                  */
2342 #define DLB2_QE_QID_SCHED_WORD 1
2343                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2344                                              sched_word[0],
2345                                              DLB2_QE_QID_SCHED_WORD);
2346                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2347                                              sched_word[1],
2348                                              DLB2_QE_QID_SCHED_WORD + 4);
2349                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2350                                              sched_word[2],
2351                                              DLB2_QE_QID_SCHED_WORD);
2352                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2353                                              sched_word[3],
2354                                              DLB2_QE_QID_SCHED_WORD + 4);
2355
2356                 /* If the destination is a load-balanced queue, store the lock
2357                  * ID. If it is a directed queue, DLB places this field in
2358                  * bytes 10-11 of the received QE, so we format it accordingly:
2359                  * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
2360                  * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2361                  * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
2362                  * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2363                  */
2364 #define DLB2_QE_LOCK_ID_WORD 2
2365                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2366                                 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2367                                         sched_word[0] : ev[0].flow_id,
2368                                 DLB2_QE_LOCK_ID_WORD);
2369                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2370                                 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2371                                         sched_word[1] : ev[1].flow_id,
2372                                 DLB2_QE_LOCK_ID_WORD + 4);
2373                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2374                                 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2375                                         sched_word[2] : ev[2].flow_id,
2376                                 DLB2_QE_LOCK_ID_WORD);
2377                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2378                                 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2379                                         sched_word[3] : ev[3].flow_id,
2380                                 DLB2_QE_LOCK_ID_WORD + 4);
2381
2382                 /* Store the event type and sub event type in the metadata:
2383                  * sse_qe[0][15:0]  = flow_id[0]
2384                  * sse_qe[0][79:64] = flow_id[1]
2385                  * sse_qe[1][15:0]  = flow_id[2]
2386                  * sse_qe[1][79:64] = flow_id[3]
2387                  */
2388 #define DLB2_QE_EV_TYPE_WORD 0
2389                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2390                                              ev[0].sub_event_type << 8 |
2391                                                 ev[0].event_type,
2392                                              DLB2_QE_EV_TYPE_WORD);
2393                 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2394                                              ev[1].sub_event_type << 8 |
2395                                                 ev[1].event_type,
2396                                              DLB2_QE_EV_TYPE_WORD + 4);
2397                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2398                                              ev[2].sub_event_type << 8 |
2399                                                 ev[2].event_type,
2400                                              DLB2_QE_EV_TYPE_WORD);
2401                 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2402                                              ev[3].sub_event_type << 8 |
2403                                                 ev[3].event_type,
2404                                              DLB2_QE_EV_TYPE_WORD + 4);
2405
2406                 /* Store the metadata to memory (use the double-precision
2407                  * _mm_storeh_pd because there is no integer function for
2408                  * storing the upper 64b):
2409                  * qe[0] metadata = sse_qe[0][63:0]
2410                  * qe[1] metadata = sse_qe[0][127:64]
2411                  * qe[2] metadata = sse_qe[1][63:0]
2412                  * qe[3] metadata = sse_qe[1][127:64]
2413                  */
2414                 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2415                 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2416                               (__m128d)sse_qe[0]);
2417                 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2418                 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2419                               (__m128d)sse_qe[1]);
2420
2421                 qe[0].data = ev[0].u64;
2422                 qe[1].data = ev[1].u64;
2423                 qe[2].data = ev[2].u64;
2424                 qe[3].data = ev[3].u64;
2425
2426                 break;
2427         case 3:
2428         case 2:
2429         case 1:
2430                 /* At least one QE will be valid, so only zero out three */
2431                 qe[1].cmd_byte = 0;
2432                 qe[2].cmd_byte = 0;
2433                 qe[3].cmd_byte = 0;
2434
2435                 for (i = 0; i < num; i++) {
2436                         qe[i].cmd_byte =
2437                                 cmd_byte_map[qm_port->is_directed][ev[i].op];
2438                         qe[i].sched_type = sched_type[i];
2439                         qe[i].data = ev[i].u64;
2440                         qe[i].qid = queue_id[i];
2441                         qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2442                         qe[i].lock_id = ev[i].flow_id;
2443                         if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2444                                 struct dlb2_msg_info *info =
2445                                         (struct dlb2_msg_info *)&qe[i].lock_id;
2446
2447                                 info->qid = queue_id[i];
2448                                 info->sched_type = DLB2_SCHED_DIRECTED;
2449                                 info->priority = qe[i].priority;
2450                         }
2451                         qe[i].u.event_type.major = ev[i].event_type;
2452                         qe[i].u.event_type.sub = ev[i].sub_event_type;
2453                 }
2454                 break;
2455         }
2456 }
2457
2458 static inline int
2459 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2460                         struct dlb2_port *qm_port,
2461                         const struct rte_event ev[],
2462                         uint8_t *sched_type,
2463                         uint8_t *queue_id)
2464 {
2465         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2466         struct dlb2_eventdev_queue *ev_queue;
2467         uint16_t *cached_credits = NULL;
2468         struct dlb2_queue *qm_queue;
2469
2470         ev_queue = &dlb2->ev_queues[ev->queue_id];
2471         qm_queue = &ev_queue->qm_queue;
2472         *queue_id = qm_queue->id;
2473
2474         /* Ignore sched_type and hardware credits on release events */
2475         if (ev->op == RTE_EVENT_OP_RELEASE)
2476                 goto op_check;
2477
2478         if (!qm_queue->is_directed) {
2479                 /* Load balanced destination queue */
2480
2481                 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2482                         rte_errno = -ENOSPC;
2483                         return 1;
2484                 }
2485                 cached_credits = &qm_port->cached_ldb_credits;
2486
2487                 switch (ev->sched_type) {
2488                 case RTE_SCHED_TYPE_ORDERED:
2489                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2490                         if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2491                                 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2492                                              *queue_id);
2493                                 rte_errno = -EINVAL;
2494                                 return 1;
2495                         }
2496                         *sched_type = DLB2_SCHED_ORDERED;
2497                         break;
2498                 case RTE_SCHED_TYPE_ATOMIC:
2499                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2500                         *sched_type = DLB2_SCHED_ATOMIC;
2501                         break;
2502                 case RTE_SCHED_TYPE_PARALLEL:
2503                         DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2504                         if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2505                                 *sched_type = DLB2_SCHED_ORDERED;
2506                         else
2507                                 *sched_type = DLB2_SCHED_UNORDERED;
2508                         break;
2509                 default:
2510                         DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2511                         DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2512                         rte_errno = -EINVAL;
2513                         return 1;
2514                 }
2515         } else {
2516                 /* Directed destination queue */
2517
2518                 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2519                         rte_errno = -ENOSPC;
2520                         return 1;
2521                 }
2522                 cached_credits = &qm_port->cached_dir_credits;
2523
2524                 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2525
2526                 *sched_type = DLB2_SCHED_DIRECTED;
2527         }
2528
2529 op_check:
2530         switch (ev->op) {
2531         case RTE_EVENT_OP_NEW:
2532                 /* Check that a sw credit is available */
2533                 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2534                         rte_errno = -ENOSPC;
2535                         return 1;
2536                 }
2537                 ev_port->inflight_credits--;
2538                 (*cached_credits)--;
2539                 break;
2540         case RTE_EVENT_OP_FORWARD:
2541                 /* Check for outstanding_releases underflow. If this occurs,
2542                  * the application is not using the EVENT_OPs correctly; for
2543                  * example, forwarding or releasing events that were not
2544                  * dequeued.
2545                  */
2546                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2547                 ev_port->outstanding_releases--;
2548                 qm_port->issued_releases++;
2549                 (*cached_credits)--;
2550                 break;
2551         case RTE_EVENT_OP_RELEASE:
2552                 ev_port->inflight_credits++;
2553                 /* Check for outstanding_releases underflow. If this occurs,
2554                  * the application is not using the EVENT_OPs correctly; for
2555                  * example, forwarding or releasing events that were not
2556                  * dequeued.
2557                  */
2558                 RTE_ASSERT(ev_port->outstanding_releases > 0);
2559                 ev_port->outstanding_releases--;
2560                 qm_port->issued_releases++;
2561
2562                 /* Replenish s/w credits if enough are cached */
2563                 dlb2_replenish_sw_credits(dlb2, ev_port);
2564                 break;
2565         }
2566
2567         DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2568         DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2569
2570 #ifndef RTE_LIBRTE_PMD_DLB2_QUELL_STATS
2571         if (ev->op != RTE_EVENT_OP_RELEASE) {
2572                 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2573                 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2574         }
2575 #endif
2576
2577         return 0;
2578 }
2579
2580 static inline uint16_t
2581 dlb2_event_enqueue_burst(void *event_port,
2582                          const struct rte_event events[],
2583                          uint16_t num)
2584 {
2585         struct dlb2_eventdev_port *ev_port = event_port;
2586         struct dlb2_port *qm_port = &ev_port->qm_port;
2587         struct process_local_port_data *port_data;
2588         int i, cnt;
2589
2590         RTE_ASSERT(ev_port->enq_configured);
2591         RTE_ASSERT(events != NULL);
2592
2593         cnt = 0;
2594
2595         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2596
2597         for (i = 0; i < num; i += DLB2_NUM_QES_PER_CACHE_LINE) {
2598                 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2599                 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2600                 int j = 0;
2601
2602                 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2603                         const struct rte_event *ev = &events[i + j];
2604
2605                         if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2606                                                     &sched_types[j],
2607                                                     &queue_ids[j]))
2608                                 break;
2609                 }
2610
2611                 if (j == 0)
2612                         break;
2613
2614                 dlb2_event_build_hcws(qm_port, &events[i], j,
2615                                       sched_types, queue_ids);
2616
2617                 if (qm_port->token_pop_mode == DELAYED_POP && j < 4 &&
2618                     qm_port->issued_releases >= qm_port->token_pop_thresh - 1) {
2619                         dlb2_construct_token_pop_qe(qm_port, j);
2620
2621                         /* Reset the releases counter for the next QE batch */
2622                         qm_port->issued_releases -= qm_port->token_pop_thresh;
2623                 }
2624
2625                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2626
2627                 cnt += j;
2628
2629                 if (j < DLB2_NUM_QES_PER_CACHE_LINE)
2630                         break;
2631         }
2632
2633         if (qm_port->token_pop_mode == DELAYED_POP &&
2634             qm_port->issued_releases >= qm_port->token_pop_thresh - 1) {
2635                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
2636                 qm_port->issued_releases -= qm_port->token_pop_thresh;
2637         }
2638         return cnt;
2639 }
2640
2641 static inline uint16_t
2642 dlb2_event_enqueue(void *event_port,
2643                    const struct rte_event events[])
2644 {
2645         return dlb2_event_enqueue_burst(event_port, events, 1);
2646 }
2647
2648 static uint16_t
2649 dlb2_event_enqueue_new_burst(void *event_port,
2650                              const struct rte_event events[],
2651                              uint16_t num)
2652 {
2653         return dlb2_event_enqueue_burst(event_port, events, num);
2654 }
2655
2656 static uint16_t
2657 dlb2_event_enqueue_forward_burst(void *event_port,
2658                                  const struct rte_event events[],
2659                                  uint16_t num)
2660 {
2661         return dlb2_event_enqueue_burst(event_port, events, num);
2662 }
2663
2664 static inline void
2665 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
2666 {
2667         uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
2668
2669         /* increment port credits, and return to pool if exceeds threshold */
2670         if (!qm_port->is_directed) {
2671                 qm_port->cached_ldb_credits += num;
2672                 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
2673                         __atomic_fetch_add(
2674                                 qm_port->credit_pool[DLB2_LDB_QUEUE],
2675                                 batch_size, __ATOMIC_SEQ_CST);
2676                         qm_port->cached_ldb_credits -= batch_size;
2677                 }
2678         } else {
2679                 qm_port->cached_dir_credits += num;
2680                 if (qm_port->cached_dir_credits >= 2 * batch_size) {
2681                         __atomic_fetch_add(
2682                                 qm_port->credit_pool[DLB2_DIR_QUEUE],
2683                                 batch_size, __ATOMIC_SEQ_CST);
2684                         qm_port->cached_dir_credits -= batch_size;
2685                 }
2686         }
2687 }
2688
2689 static inline int
2690 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
2691                   struct dlb2_eventdev_port *ev_port,
2692                   struct dlb2_port *qm_port,
2693                   uint64_t timeout,
2694                   uint64_t start_ticks)
2695 {
2696         struct process_local_port_data *port_data;
2697         uint64_t elapsed_ticks;
2698
2699         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2700
2701         elapsed_ticks = rte_get_timer_cycles() - start_ticks;
2702
2703         /* Wait/poll time expired */
2704         if (elapsed_ticks >= timeout) {
2705                 return 1;
2706         } else if (dlb2->umwait_allowed) {
2707                 volatile struct dlb2_dequeue_qe *cq_base;
2708                 union {
2709                         uint64_t raw_qe[2];
2710                         struct dlb2_dequeue_qe qe;
2711                 } qe_mask;
2712                 uint64_t expected_value;
2713                 volatile uint64_t *monitor_addr;
2714
2715                 qe_mask.qe.cq_gen = 1; /* set mask */
2716
2717                 cq_base = port_data->cq_base;
2718                 monitor_addr = (volatile uint64_t *)(volatile void *)
2719                         &cq_base[qm_port->cq_idx];
2720                 monitor_addr++; /* cq_gen bit is in second 64bit location */
2721
2722                 if (qm_port->gen_bit)
2723                         expected_value = qe_mask.raw_qe[1];
2724                 else
2725                         expected_value = 0;
2726
2727                 rte_power_monitor(monitor_addr, expected_value,
2728                                   qe_mask.raw_qe[1], timeout + start_ticks,
2729                                   sizeof(uint64_t));
2730
2731                 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
2732         } else {
2733                 uint64_t poll_interval = RTE_LIBRTE_PMD_DLB2_POLL_INTERVAL;
2734                 uint64_t curr_ticks = rte_get_timer_cycles();
2735                 uint64_t init_ticks = curr_ticks;
2736
2737                 while ((curr_ticks - start_ticks < timeout) &&
2738                        (curr_ticks - init_ticks < poll_interval))
2739                         curr_ticks = rte_get_timer_cycles();
2740         }
2741
2742         return 0;
2743 }
2744
2745 static inline int
2746 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
2747                          struct dlb2_port *qm_port,
2748                          struct rte_event *events,
2749                          struct dlb2_dequeue_qe *qes,
2750                          int cnt)
2751 {
2752         uint8_t *qid_mappings = qm_port->qid_mappings;
2753         int i, num, evq_id;
2754
2755         for (i = 0, num = 0; i < cnt; i++) {
2756                 struct dlb2_dequeue_qe *qe = &qes[i];
2757                 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
2758                         [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
2759                         [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
2760                         [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
2761                         [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
2762                 };
2763
2764                 /* Fill in event information.
2765                  * Note that flow_id must be embedded in the data by
2766                  * the app, such as the mbuf RSS hash field if the data
2767                  * buffer is a mbuf.
2768                  */
2769                 if (unlikely(qe->error)) {
2770                         DLB2_LOG_ERR("QE error bit ON\n");
2771                         DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
2772                         dlb2_consume_qe_immediate(qm_port, 1);
2773                         continue; /* Ignore */
2774                 }
2775
2776                 events[num].u64 = qe->data;
2777                 events[num].flow_id = qe->flow_id;
2778                 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
2779                 events[num].event_type = qe->u.event_type.major;
2780                 events[num].sub_event_type = qe->u.event_type.sub;
2781                 events[num].sched_type = sched_type_map[qe->sched_type];
2782                 events[num].impl_opaque = qe->qid_depth;
2783
2784                 /* qid not preserved for directed queues */
2785                 if (qm_port->is_directed)
2786                         evq_id = ev_port->link[0].queue_id;
2787                 else
2788                         evq_id = qid_mappings[qe->qid];
2789
2790                 events[num].queue_id = evq_id;
2791                 DLB2_INC_STAT(
2792                         ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
2793                         1);
2794                 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
2795                 num++;
2796         }
2797
2798         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
2799
2800         return num;
2801 }
2802
2803 static inline int
2804 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
2805                               struct dlb2_port *qm_port,
2806                               struct rte_event *events,
2807                               struct dlb2_dequeue_qe *qes)
2808 {
2809         int sched_type_map[] = {
2810                 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
2811                 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
2812                 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
2813                 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
2814         };
2815         const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
2816         uint8_t *qid_mappings = qm_port->qid_mappings;
2817         __m128i sse_evt[2];
2818
2819         /* In the unlikely case that any of the QE error bits are set, process
2820          * them one at a time.
2821          */
2822         if (unlikely(qes[0].error || qes[1].error ||
2823                      qes[2].error || qes[3].error))
2824                 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
2825                                                  qes, num_events);
2826
2827         events[0].u64 = qes[0].data;
2828         events[1].u64 = qes[1].data;
2829         events[2].u64 = qes[2].data;
2830         events[3].u64 = qes[3].data;
2831
2832         /* Construct the metadata portion of two struct rte_events
2833          * in one 128b SSE register. Event metadata is constructed in the SSE
2834          * registers like so:
2835          * sse_evt[0][63:0]:   event[0]'s metadata
2836          * sse_evt[0][127:64]: event[1]'s metadata
2837          * sse_evt[1][63:0]:   event[2]'s metadata
2838          * sse_evt[1][127:64]: event[3]'s metadata
2839          */
2840         sse_evt[0] = _mm_setzero_si128();
2841         sse_evt[1] = _mm_setzero_si128();
2842
2843         /* Convert the hardware queue ID to an event queue ID and store it in
2844          * the metadata:
2845          * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
2846          * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
2847          * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
2848          * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
2849          */
2850 #define DLB_EVENT_QUEUE_ID_BYTE 5
2851         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
2852                                      qid_mappings[qes[0].qid],
2853                                      DLB_EVENT_QUEUE_ID_BYTE);
2854         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
2855                                      qid_mappings[qes[1].qid],
2856                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
2857         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
2858                                      qid_mappings[qes[2].qid],
2859                                      DLB_EVENT_QUEUE_ID_BYTE);
2860         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
2861                                      qid_mappings[qes[3].qid],
2862                                      DLB_EVENT_QUEUE_ID_BYTE + 8);
2863
2864         /* Convert the hardware priority to an event priority and store it in
2865          * the metadata, while also returning the queue depth status
2866          * value captured by the hardware, storing it in impl_opaque, which can
2867          * be read by the application but not modified
2868          * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
2869          * sse_evt[0][63:56]   = qes[0].qid_depth
2870          * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
2871          * sse_evt[0][127:120] = qes[1].qid_depth
2872          * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
2873          * sse_evt[1][63:56]   = qes[2].qid_depth
2874          * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
2875          * sse_evt[1][127:120] = qes[3].qid_depth
2876          */
2877 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
2878 #define DLB_BYTE_SHIFT 8
2879         sse_evt[0] =
2880                 _mm_insert_epi16(sse_evt[0],
2881                         DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
2882                         (qes[0].qid_depth << DLB_BYTE_SHIFT),
2883                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
2884         sse_evt[0] =
2885                 _mm_insert_epi16(sse_evt[0],
2886                         DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
2887                         (qes[1].qid_depth << DLB_BYTE_SHIFT),
2888                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
2889         sse_evt[1] =
2890                 _mm_insert_epi16(sse_evt[1],
2891                         DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
2892                         (qes[2].qid_depth << DLB_BYTE_SHIFT),
2893                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
2894         sse_evt[1] =
2895                 _mm_insert_epi16(sse_evt[1],
2896                         DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
2897                         (qes[3].qid_depth << DLB_BYTE_SHIFT),
2898                         DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
2899
2900         /* Write the event type, sub event type, and flow_id to the event
2901          * metadata.
2902          * sse_evt[0][31:0]   = qes[0].flow_id |
2903          *                      qes[0].u.event_type.major << 28 |
2904          *                      qes[0].u.event_type.sub << 20;
2905          * sse_evt[0][95:64]  = qes[1].flow_id |
2906          *                      qes[1].u.event_type.major << 28 |
2907          *                      qes[1].u.event_type.sub << 20;
2908          * sse_evt[1][31:0]   = qes[2].flow_id |
2909          *                      qes[2].u.event_type.major << 28 |
2910          *                      qes[2].u.event_type.sub << 20;
2911          * sse_evt[1][95:64]  = qes[3].flow_id |
2912          *                      qes[3].u.event_type.major << 28 |
2913          *                      qes[3].u.event_type.sub << 20;
2914          */
2915 #define DLB_EVENT_EV_TYPE_DW 0
2916 #define DLB_EVENT_EV_TYPE_SHIFT 28
2917 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
2918         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
2919                         qes[0].flow_id |
2920                         qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
2921                         qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
2922                         DLB_EVENT_EV_TYPE_DW);
2923         sse_evt[0] = _mm_insert_epi32(sse_evt[0],
2924                         qes[1].flow_id |
2925                         qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
2926                         qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
2927                         DLB_EVENT_EV_TYPE_DW + 2);
2928         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
2929                         qes[2].flow_id |
2930                         qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
2931                         qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
2932                         DLB_EVENT_EV_TYPE_DW);
2933         sse_evt[1] = _mm_insert_epi32(sse_evt[1],
2934                         qes[3].flow_id |
2935                         qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
2936                         qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
2937                         DLB_EVENT_EV_TYPE_DW + 2);
2938
2939         /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
2940          * set:
2941          * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
2942          * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
2943          * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
2944          * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
2945          */
2946 #define DLB_EVENT_SCHED_TYPE_BYTE 4
2947 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
2948         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
2949                 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
2950                 DLB_EVENT_SCHED_TYPE_BYTE);
2951         sse_evt[0] = _mm_insert_epi8(sse_evt[0],
2952                 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
2953                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
2954         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
2955                 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
2956                 DLB_EVENT_SCHED_TYPE_BYTE);
2957         sse_evt[1] = _mm_insert_epi8(sse_evt[1],
2958                 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
2959                 DLB_EVENT_SCHED_TYPE_BYTE + 8);
2960
2961         /* Store the metadata to the event (use the double-precision
2962          * _mm_storeh_pd because there is no integer function for storing the
2963          * upper 64b):
2964          * events[0].event = sse_evt[0][63:0]
2965          * events[1].event = sse_evt[0][127:64]
2966          * events[2].event = sse_evt[1][63:0]
2967          * events[3].event = sse_evt[1][127:64]
2968          */
2969         _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
2970         _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
2971         _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
2972         _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
2973
2974         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
2975         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
2976         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
2977         DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
2978
2979         DLB2_INC_STAT(
2980                 ev_port->stats.queue[events[0].queue_id].
2981                         qid_depth[qes[0].qid_depth],
2982                 1);
2983         DLB2_INC_STAT(
2984                 ev_port->stats.queue[events[1].queue_id].
2985                         qid_depth[qes[1].qid_depth],
2986                 1);
2987         DLB2_INC_STAT(
2988                 ev_port->stats.queue[events[2].queue_id].
2989                         qid_depth[qes[2].qid_depth],
2990                 1);
2991         DLB2_INC_STAT(
2992                 ev_port->stats.queue[events[3].queue_id].
2993                         qid_depth[qes[3].qid_depth],
2994                 1);
2995
2996         DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
2997
2998         return num_events;
2999 }
3000
3001 static __rte_always_inline int
3002 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3003 {
3004         volatile struct dlb2_dequeue_qe *cq_addr;
3005         uint8_t xor_mask[2] = {0x0F, 0x00};
3006         const uint8_t and_mask = 0x0F;
3007         __m128i *qes = (__m128i *)qe;
3008         uint8_t gen_bits, gen_bit;
3009         uintptr_t addr[4];
3010         uint16_t idx;
3011
3012         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3013
3014         idx = qm_port->cq_idx;
3015
3016         /* Load the next 4 QEs */
3017         addr[0] = (uintptr_t)&cq_addr[idx];
3018         addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3019         addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3020         addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3021
3022         /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3023         rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3024         rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3025         rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3026         rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3027
3028         /* Correct the xor_mask for wrap-around QEs */
3029         gen_bit = qm_port->gen_bit;
3030         xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3031         xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3032         xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3033
3034         /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3035          * valid, then QEs[0:N-1] are too.
3036          */
3037         qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3038         rte_compiler_barrier();
3039         qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3040         rte_compiler_barrier();
3041         qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3042         rte_compiler_barrier();
3043         qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3044
3045         /* Extract and combine the gen bits */
3046         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3047                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3048                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3049                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3050
3051         /* XOR the combined bits such that a 1 represents a valid QE */
3052         gen_bits ^= xor_mask[gen_bit];
3053
3054         /* Mask off gen bits we don't care about */
3055         gen_bits &= and_mask;
3056
3057         return __builtin_popcount(gen_bits);
3058 }
3059
3060 static inline void
3061 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3062 {
3063         uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3064
3065         qm_port->cq_idx_unmasked = idx;
3066         qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3067         qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3068 }
3069
3070 static int
3071 dlb2_event_release(struct dlb2_eventdev *dlb2,
3072                    uint8_t port_id,
3073                    int n)
3074 {
3075         struct process_local_port_data *port_data;
3076         struct dlb2_eventdev_port *ev_port;
3077         struct dlb2_port *qm_port;
3078         int i, cnt;
3079
3080         if (port_id > dlb2->num_ports) {
3081                 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3082                              port_id);
3083                 rte_errno = -EINVAL;
3084                 return rte_errno;
3085         }
3086
3087         ev_port = &dlb2->ev_ports[port_id];
3088         qm_port = &ev_port->qm_port;
3089         port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3090
3091         cnt = 0;
3092
3093         if (qm_port->is_directed) {
3094                 cnt = n;
3095                 goto sw_credit_update;
3096         }
3097
3098         for (i = 0; i < n; i += DLB2_NUM_QES_PER_CACHE_LINE) {
3099                 int j;
3100
3101                 /* Zero-out QEs */
3102                 qm_port->qe4[0].cmd_byte = 0;
3103                 qm_port->qe4[1].cmd_byte = 0;
3104                 qm_port->qe4[2].cmd_byte = 0;
3105                 qm_port->qe4[3].cmd_byte = 0;
3106
3107                 for (j = 0; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++)
3108                         qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3109
3110                 qm_port->issued_releases += j;
3111
3112                 if (j == 0)
3113                         break;
3114
3115                 if (qm_port->token_pop_mode == DELAYED_POP && j < 4 &&
3116                     qm_port->issued_releases >= qm_port->token_pop_thresh - 1) {
3117                         dlb2_construct_token_pop_qe(qm_port, j);
3118
3119                         /* Reset the releases counter for the next QE batch */
3120                         qm_port->issued_releases -= qm_port->token_pop_thresh;
3121                 }
3122
3123                 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3124
3125                 cnt += j;
3126         }
3127
3128         if (qm_port->token_pop_mode == DELAYED_POP &&
3129             qm_port->issued_releases >= qm_port->token_pop_thresh - 1) {
3130                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
3131                 qm_port->issued_releases -= qm_port->token_pop_thresh;
3132         }
3133
3134 sw_credit_update:
3135         /* each release returns one credit */
3136         if (!ev_port->outstanding_releases) {
3137                 DLB2_LOG_ERR("Unrecoverable application error. Outstanding releases underflowed.\n");
3138                 rte_errno = -ENOTRECOVERABLE;
3139                 return rte_errno;
3140         }
3141
3142         ev_port->outstanding_releases -= cnt;
3143         ev_port->inflight_credits += cnt;
3144
3145         /* Replenish s/w credits if enough releases are performed */
3146         dlb2_replenish_sw_credits(dlb2, ev_port);
3147         return 0;
3148 }
3149
3150 static inline int16_t
3151 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3152                        struct dlb2_eventdev_port *ev_port,
3153                        struct rte_event *events,
3154                        uint16_t max_num,
3155                        uint64_t dequeue_timeout_ticks)
3156 {
3157         uint64_t timeout;
3158         uint64_t start_ticks = 0ULL;
3159         struct dlb2_port *qm_port;
3160         int num = 0;
3161
3162         qm_port = &ev_port->qm_port;
3163
3164         /* We have a special implementation for waiting. Wait can be:
3165          * 1) no waiting at all
3166          * 2) busy poll only
3167          * 3) wait for interrupt. If wakeup and poll time
3168          * has expired, then return to caller
3169          * 4) umonitor/umwait repeatedly up to poll time
3170          */
3171
3172         /* If configured for per dequeue wait, then use wait value provided
3173          * to this API. Otherwise we must use the global
3174          * value from eventdev config time.
3175          */
3176         if (!dlb2->global_dequeue_wait)
3177                 timeout = dequeue_timeout_ticks;
3178         else
3179                 timeout = dlb2->global_dequeue_wait_ticks;
3180
3181         start_ticks = rte_get_timer_cycles();
3182
3183         while (num < max_num) {
3184                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3185                 int num_avail;
3186
3187                 /* Copy up to 4 QEs from the current cache line into qes */
3188                 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3189
3190                 /* But don't process more than the user requested */
3191                 num_avail = RTE_MIN(num_avail, max_num - num);
3192
3193                 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3194
3195                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3196                         num += dlb2_process_dequeue_four_qes(ev_port,
3197                                                               qm_port,
3198                                                               &events[num],
3199                                                               &qes[0]);
3200                 else if (num_avail)
3201                         num += dlb2_process_dequeue_qes(ev_port,
3202                                                          qm_port,
3203                                                          &events[num],
3204                                                          &qes[0],
3205                                                          num_avail);
3206                 else if ((timeout == 0) || (num > 0))
3207                         /* Not waiting in any form, or 1+ events received? */
3208                         break;
3209                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3210                                            timeout, start_ticks))
3211                         break;
3212         }
3213
3214         qm_port->owed_tokens += num;
3215
3216         if (num) {
3217                 if (qm_port->token_pop_mode == AUTO_POP)
3218                         dlb2_consume_qe_immediate(qm_port, num);
3219
3220                 ev_port->outstanding_releases += num;
3221
3222                 dlb2_port_credits_inc(qm_port, num);
3223         }
3224
3225         return num;
3226 }
3227
3228 static __rte_always_inline int
3229 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3230              uint8_t *offset)
3231 {
3232         uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3233                                    {0x00, 0x01, 0x03, 0x07} };
3234         uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3235         volatile struct dlb2_dequeue_qe *cq_addr;
3236         __m128i *qes = (__m128i *)qe;
3237         uint64_t *cache_line_base;
3238         uint8_t gen_bits;
3239
3240         cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3241         cq_addr = &cq_addr[qm_port->cq_idx];
3242
3243         cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3244         *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3245
3246         /* Load the next CQ cache line from memory. Pack these reads as tight
3247          * as possible to reduce the chance that DLB invalidates the line while
3248          * the CPU is reading it. Read the cache line backwards to ensure that
3249          * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3250          *
3251          * (Valid QEs start at &qe[offset])
3252          */
3253         qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3254         qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3255         qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3256         qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3257
3258         /* Evict the cache line ASAP */
3259         rte_cldemote(cache_line_base);
3260
3261         /* Extract and combine the gen bits */
3262         gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3263                    ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3264                    ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3265                    ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3266
3267         /* XOR the combined bits such that a 1 represents a valid QE */
3268         gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
3269
3270         /* Mask off gen bits we don't care about */
3271         gen_bits &= and_mask[*offset];
3272
3273         return __builtin_popcount(gen_bits);
3274 }
3275
3276 static inline int16_t
3277 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
3278                 struct dlb2_eventdev_port *ev_port,
3279                 struct rte_event *events,
3280                 uint16_t max_num,
3281                 uint64_t dequeue_timeout_ticks)
3282 {
3283         uint64_t timeout;
3284         uint64_t start_ticks = 0ULL;
3285         struct dlb2_port *qm_port;
3286         int num = 0;
3287
3288         qm_port = &ev_port->qm_port;
3289
3290         /* We have a special implementation for waiting. Wait can be:
3291          * 1) no waiting at all
3292          * 2) busy poll only
3293          * 3) wait for interrupt. If wakeup and poll time
3294          * has expired, then return to caller
3295          * 4) umonitor/umwait repeatedly up to poll time
3296          */
3297
3298         /* If configured for per dequeue wait, then use wait value provided
3299          * to this API. Otherwise we must use the global
3300          * value from eventdev config time.
3301          */
3302         if (!dlb2->global_dequeue_wait)
3303                 timeout = dequeue_timeout_ticks;
3304         else
3305                 timeout = dlb2->global_dequeue_wait_ticks;
3306
3307         start_ticks = rte_get_timer_cycles();
3308
3309         while (num < max_num) {
3310                 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3311                 uint8_t offset;
3312                 int num_avail;
3313
3314                 /* Copy up to 4 QEs from the current cache line into qes */
3315                 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
3316
3317                 /* But don't process more than the user requested */
3318                 num_avail = RTE_MIN(num_avail, max_num - num);
3319
3320                 dlb2_inc_cq_idx(qm_port, num_avail);
3321
3322                 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3323                         num += dlb2_process_dequeue_four_qes(ev_port,
3324                                                              qm_port,
3325                                                              &events[num],
3326                                                              &qes[offset]);
3327                 else if (num_avail)
3328                         num += dlb2_process_dequeue_qes(ev_port,
3329                                                         qm_port,
3330                                                         &events[num],
3331                                                         &qes[offset],
3332                                                         num_avail);
3333                 else if ((timeout == 0) || (num > 0))
3334                         /* Not waiting in any form, or 1+ events received? */
3335                         break;
3336                 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3337                                            timeout, start_ticks))
3338                         break;
3339         }
3340
3341         qm_port->owed_tokens += num;
3342
3343         if (num) {
3344                 if (qm_port->token_pop_mode == AUTO_POP)
3345                         dlb2_consume_qe_immediate(qm_port, num);
3346
3347                 ev_port->outstanding_releases += num;
3348
3349                 dlb2_port_credits_inc(qm_port, num);
3350         }
3351
3352         return num;
3353 }
3354
3355 static uint16_t
3356 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
3357                          uint64_t wait)
3358 {
3359         struct dlb2_eventdev_port *ev_port = event_port;
3360         struct dlb2_port *qm_port = &ev_port->qm_port;
3361         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
3362         uint16_t cnt;
3363
3364         RTE_ASSERT(ev_port->setup_done);
3365         RTE_ASSERT(ev != NULL);
3366
3367         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
3368                 uint16_t out_rels = ev_port->outstanding_releases;
3369
3370                 if (dlb2_event_release(dlb2, ev_port->id, out_rels))
3371                         return 0; /* rte_errno is set */
3372
3373                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
3374         }
3375
3376         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
3377                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
3378
3379         cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
3380
3381         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
3382         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
3383
3384         return cnt;
3385 }
3386
3387 static uint16_t
3388 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
3389 {
3390         return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
3391 }
3392
3393 static uint16_t
3394 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
3395                                 uint16_t num, uint64_t wait)
3396 {
3397         struct dlb2_eventdev_port *ev_port = event_port;
3398         struct dlb2_port *qm_port = &ev_port->qm_port;
3399         struct dlb2_eventdev *dlb2 = ev_port->dlb2;
3400         uint16_t cnt;
3401
3402         RTE_ASSERT(ev_port->setup_done);
3403         RTE_ASSERT(ev != NULL);
3404
3405         if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
3406                 uint16_t out_rels = ev_port->outstanding_releases;
3407
3408                 if (dlb2_event_release(dlb2, ev_port->id, out_rels))
3409                         return 0; /* rte_errno is set */
3410
3411                 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
3412         }
3413
3414         if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
3415                 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
3416
3417         cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
3418
3419         DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
3420         DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
3421         return cnt;
3422 }
3423
3424 static uint16_t
3425 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
3426                           uint64_t wait)
3427 {
3428         return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
3429 }
3430
3431 static void
3432 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
3433 {
3434         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
3435         eventdev_stop_flush_t flush;
3436         struct rte_event ev;
3437         uint8_t dev_id;
3438         void *arg;
3439         int i;
3440
3441         flush = dev->dev_ops->dev_stop_flush;
3442         dev_id = dev->data->dev_id;
3443         arg = dev->data->dev_stop_flush_arg;
3444
3445         while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
3446                 if (flush)
3447                         flush(dev_id, ev, arg);
3448
3449                 if (dlb2->ev_ports[port_id].qm_port.is_directed)
3450                         continue;
3451
3452                 ev.op = RTE_EVENT_OP_RELEASE;
3453
3454                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
3455         }
3456
3457         /* Enqueue any additional outstanding releases */
3458         ev.op = RTE_EVENT_OP_RELEASE;
3459
3460         for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
3461                 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
3462 }
3463
3464 static uint32_t
3465 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
3466                          struct dlb2_eventdev_queue *queue)
3467 {
3468         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
3469         struct dlb2_get_ldb_queue_depth_args cfg;
3470         int ret;
3471
3472         cfg.queue_id = queue->qm_queue.id;
3473
3474         ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
3475         if (ret < 0) {
3476                 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
3477                              ret, dlb2_error_strings[cfg.response.status]);
3478                 return ret;
3479         }
3480
3481         return cfg.response.id;
3482 }
3483
3484 static uint32_t
3485 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
3486                          struct dlb2_eventdev_queue *queue)
3487 {
3488         struct dlb2_hw_dev *handle = &dlb2->qm_instance;
3489         struct dlb2_get_dir_queue_depth_args cfg;
3490         int ret;
3491
3492         cfg.queue_id = queue->qm_queue.id;
3493
3494         ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
3495         if (ret < 0) {
3496                 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
3497                              ret, dlb2_error_strings[cfg.response.status]);
3498                 return ret;
3499         }
3500
3501         return cfg.response.id;
3502 }
3503
3504 uint32_t
3505 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
3506                      struct dlb2_eventdev_queue *queue)
3507 {
3508         if (queue->qm_queue.is_directed)
3509                 return dlb2_get_dir_queue_depth(dlb2, queue);
3510         else
3511                 return dlb2_get_ldb_queue_depth(dlb2, queue);
3512 }
3513
3514 static bool
3515 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
3516                     struct dlb2_eventdev_queue *queue)
3517 {
3518         return dlb2_get_queue_depth(dlb2, queue) == 0;
3519 }
3520
3521 static bool
3522 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
3523 {
3524         int i;
3525
3526         for (i = 0; i < dlb2->num_queues; i++) {
3527                 if (dlb2->ev_queues[i].num_links == 0)
3528                         continue;
3529                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
3530                         return false;
3531         }
3532
3533         return true;
3534 }
3535
3536 static bool
3537 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
3538 {
3539         int i;
3540
3541         for (i = 0; i < dlb2->num_queues; i++) {
3542                 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
3543                         return false;
3544         }
3545
3546         return true;
3547 }
3548
3549 static void
3550 dlb2_drain(struct rte_eventdev *dev)
3551 {
3552         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
3553         struct dlb2_eventdev_port *ev_port = NULL;
3554         uint8_t dev_id;
3555         int i;
3556
3557         dev_id = dev->data->dev_id;
3558
3559         while (!dlb2_linked_queues_empty(dlb2)) {
3560                 /* Flush all the ev_ports, which will drain all their connected
3561                  * queues.
3562                  */
3563                 for (i = 0; i < dlb2->num_ports; i++)
3564                         dlb2_flush_port(dev, i);
3565         }
3566
3567         /* The queues are empty, but there may be events left in the ports. */
3568         for (i = 0; i < dlb2->num_ports; i++)
3569                 dlb2_flush_port(dev, i);
3570
3571         /* If the domain's queues are empty, we're done. */
3572         if (dlb2_queues_empty(dlb2))
3573                 return;
3574
3575         /* Else, there must be at least one unlinked load-balanced queue.
3576          * Select a load-balanced port with which to drain the unlinked
3577          * queue(s).
3578          */
3579         for (i = 0; i < dlb2->num_ports; i++) {
3580                 ev_port = &dlb2->ev_ports[i];
3581
3582                 if (!ev_port->qm_port.is_directed)
3583                         break;
3584         }
3585
3586         if (i == dlb2->num_ports) {
3587                 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
3588                 return;
3589         }
3590
3591         rte_errno = 0;
3592         rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
3593
3594         if (rte_errno) {
3595                 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
3596                              ev_port->id);
3597                 return;
3598         }
3599
3600         for (i = 0; i < dlb2->num_queues; i++) {
3601                 uint8_t qid, prio;
3602                 int ret;
3603
3604                 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
3605                         continue;
3606
3607                 qid = i;
3608                 prio = 0;
3609
3610                 /* Link the ev_port to the queue */
3611                 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
3612                 if (ret != 1) {
3613                         DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
3614                                      ev_port->id, qid);
3615                         return;
3616                 }
3617
3618                 /* Flush the queue */
3619                 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
3620                         dlb2_flush_port(dev, ev_port->id);
3621
3622                 /* Drain any extant events in the ev_port. */
3623                 dlb2_flush_port(dev, ev_port->id);
3624
3625                 /* Unlink the ev_port from the queue */
3626                 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
3627                 if (ret != 1) {
3628                         DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
3629                                      ev_port->id, qid);
3630                         return;
3631                 }
3632         }
3633 }
3634
3635 static void
3636 dlb2_eventdev_stop(struct rte_eventdev *dev)
3637 {
3638         struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
3639
3640         rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
3641
3642         if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
3643                 DLB2_LOG_DBG("Internal error: already stopped\n");
3644                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
3645                 return;
3646         } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
3647                 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
3648                              (int)dlb2->run_state);
3649                 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
3650                 return;
3651         }
3652
3653         dlb2->run_state = DLB2_RUN_STATE_STOPPING;
3654
3655         rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
3656
3657         dlb2_drain(dev);
3658
3659         dlb2->run_state = DLB2_RUN_STATE_STOPPED;
3660 }
3661
3662 static int
3663 dlb2_eventdev_close(struct rte_eventdev *dev)
3664 {
3665         dlb2_hw_reset_sched_domain(dev, false);
3666
3667         return 0;
3668 }
3669
3670 static void
3671 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
3672 {
3673         RTE_SET_USED(dev);
3674         RTE_SET_USED(id);
3675
3676         /* This function intentionally left blank. */
3677 }
3678
3679 static void
3680 dlb2_eventdev_port_release(void *port)
3681 {
3682         struct dlb2_eventdev_port *ev_port = port;
3683         struct dlb2_port *qm_port;
3684
3685         if (ev_port) {
3686                 qm_port = &ev_port->qm_port;
3687                 if (qm_port->config_state == DLB2_CONFIGURED)
3688                         dlb2_free_qe_mem(qm_port);
3689         }
3690 }
3691
3692 static int
3693 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
3694                             uint64_t *timeout_ticks)
3695 {
3696         RTE_SET_USED(dev);
3697         uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
3698
3699         *timeout_ticks = ns * cycles_per_ns;
3700
3701         return 0;
3702 }
3703
3704 static void
3705 dlb2_entry_points_init(struct rte_eventdev *dev)
3706 {
3707         struct dlb2_eventdev *dlb2;
3708
3709         /* Expose PMD's eventdev interface */
3710         static struct rte_eventdev_ops dlb2_eventdev_entry_ops = {
3711                 .dev_infos_get    = dlb2_eventdev_info_get,
3712                 .dev_configure    = dlb2_eventdev_configure,
3713                 .dev_start        = dlb2_eventdev_start,
3714                 .dev_stop         = dlb2_eventdev_stop,
3715                 .dev_close        = dlb2_eventdev_close,
3716                 .queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
3717                 .queue_setup      = dlb2_eventdev_queue_setup,
3718                 .queue_release    = dlb2_eventdev_queue_release,
3719                 .port_def_conf    = dlb2_eventdev_port_default_conf_get,
3720                 .port_setup       = dlb2_eventdev_port_setup,
3721                 .port_release     = dlb2_eventdev_port_release,
3722                 .port_link        = dlb2_eventdev_port_link,
3723                 .port_unlink      = dlb2_eventdev_port_unlink,
3724                 .port_unlinks_in_progress =
3725                                     dlb2_eventdev_port_unlinks_in_progress,
3726                 .timeout_ticks    = dlb2_eventdev_timeout_ticks,
3727                 .dump             = dlb2_eventdev_dump,
3728                 .xstats_get       = dlb2_eventdev_xstats_get,
3729                 .xstats_get_names = dlb2_eventdev_xstats_get_names,
3730                 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
3731                 .xstats_reset       = dlb2_eventdev_xstats_reset,
3732                 .dev_selftest     = test_dlb2_eventdev,
3733         };
3734
3735         /* Expose PMD's eventdev interface */
3736
3737         dev->dev_ops = &dlb2_eventdev_entry_ops;
3738         dev->enqueue = dlb2_event_enqueue;
3739         dev->enqueue_burst = dlb2_event_enqueue_burst;
3740         dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
3741         dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
3742
3743         dlb2 = dev->data->dev_private;
3744         if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
3745                 dev->dequeue = dlb2_event_dequeue_sparse;
3746                 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
3747         } else {
3748                 dev->dequeue = dlb2_event_dequeue;
3749                 dev->dequeue_burst = dlb2_event_dequeue_burst;
3750         }
3751 }
3752
3753 int
3754 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
3755                             const char *name,
3756                             struct dlb2_devargs *dlb2_args)
3757 {
3758         struct dlb2_eventdev *dlb2;
3759         int err, i;
3760
3761         dlb2 = dev->data->dev_private;
3762
3763         dlb2->event_dev = dev; /* backlink */
3764
3765         evdev_dlb2_default_info.driver_name = name;
3766
3767         dlb2->max_num_events_override = dlb2_args->max_num_events;
3768         dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
3769         dlb2->qm_instance.cos_id = dlb2_args->cos_id;
3770
3771         err = dlb2_iface_open(&dlb2->qm_instance, name);
3772         if (err < 0) {
3773                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
3774                              err);
3775                 return err;
3776         }
3777
3778         err = dlb2_iface_get_device_version(&dlb2->qm_instance,
3779                                             &dlb2->revision);
3780         if (err < 0) {
3781                 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
3782                              err);
3783                 return err;
3784         }
3785
3786         err = dlb2_hw_query_resources(dlb2);
3787         if (err) {
3788                 DLB2_LOG_ERR("get resources err=%d for %s\n",
3789                              err, name);
3790                 return err;
3791         }
3792
3793         dlb2_iface_hardware_init(&dlb2->qm_instance);
3794
3795         err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
3796         if (err < 0) {
3797                 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
3798                              err);
3799                 return err;
3800         }
3801
3802         /* Complete xtstats runtime initialization */
3803         err = dlb2_xstats_init(dlb2);
3804         if (err) {
3805                 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
3806                 return err;
3807         }
3808
3809         /* Initialize each port's token pop mode */
3810         for (i = 0; i < DLB2_MAX_NUM_PORTS; i++)
3811                 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
3812
3813         rte_spinlock_init(&dlb2->qm_instance.resource_lock);
3814
3815         dlb2_iface_low_level_io_init();
3816
3817         dlb2_entry_points_init(dev);
3818
3819         dlb2_init_queue_depth_thresholds(dlb2,
3820                                          dlb2_args->qid_depth_thresholds.val);
3821
3822         return 0;
3823 }
3824
3825 int
3826 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
3827                               const char *name)
3828 {
3829         struct dlb2_eventdev *dlb2;
3830         int err;
3831
3832         dlb2 = dev->data->dev_private;
3833
3834         evdev_dlb2_default_info.driver_name = name;
3835
3836         err = dlb2_iface_open(&dlb2->qm_instance, name);
3837         if (err < 0) {
3838                 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
3839                              err);
3840                 return err;
3841         }
3842
3843         err = dlb2_hw_query_resources(dlb2);
3844         if (err) {
3845                 DLB2_LOG_ERR("get resources err=%d for %s\n",
3846                              err, name);
3847                 return err;
3848         }
3849
3850         dlb2_iface_low_level_io_init();
3851
3852         dlb2_entry_points_init(dev);
3853
3854         return 0;
3855 }
3856
3857 int
3858 dlb2_parse_params(const char *params,
3859                   const char *name,
3860                   struct dlb2_devargs *dlb2_args)
3861 {
3862         int ret = 0;
3863         static const char * const args[] = { NUMA_NODE_ARG,
3864                                              DLB2_MAX_NUM_EVENTS,
3865                                              DLB2_NUM_DIR_CREDITS,
3866                                              DEV_ID_ARG,
3867                                              DLB2_QID_DEPTH_THRESH_ARG,
3868                                              DLB2_COS_ARG,
3869                                              NULL };
3870
3871         if (params != NULL && params[0] != '\0') {
3872                 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
3873
3874                 if (kvlist == NULL) {
3875                         RTE_LOG(INFO, PMD,
3876                                 "Ignoring unsupported parameters when creating device '%s'\n",
3877                                 name);
3878                 } else {
3879                         int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
3880                                                      set_numa_node,
3881                                                      &dlb2_args->socket_id);
3882                         if (ret != 0) {
3883                                 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
3884                                              name);
3885                                 rte_kvargs_free(kvlist);
3886                                 return ret;
3887                         }
3888
3889                         ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
3890                                                  set_max_num_events,
3891                                                  &dlb2_args->max_num_events);
3892                         if (ret != 0) {
3893                                 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
3894                                              name);
3895                                 rte_kvargs_free(kvlist);
3896                                 return ret;
3897                         }
3898
3899                         ret = rte_kvargs_process(kvlist,
3900                                         DLB2_NUM_DIR_CREDITS,
3901                                         set_num_dir_credits,
3902                                         &dlb2_args->num_dir_credits_override);
3903                         if (ret != 0) {
3904                                 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
3905                                              name);
3906                                 rte_kvargs_free(kvlist);
3907                                 return ret;
3908                         }
3909
3910                         ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
3911                                                  set_dev_id,
3912                                                  &dlb2_args->dev_id);
3913                         if (ret != 0) {
3914                                 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
3915                                              name);
3916                                 rte_kvargs_free(kvlist);
3917                                 return ret;
3918                         }
3919
3920                         ret = rte_kvargs_process(
3921                                         kvlist,
3922                                         DLB2_QID_DEPTH_THRESH_ARG,
3923                                         set_qid_depth_thresh,
3924                                         &dlb2_args->qid_depth_thresholds);
3925                         if (ret != 0) {
3926                                 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
3927                                              name);
3928                                 rte_kvargs_free(kvlist);
3929                                 return ret;
3930                         }
3931
3932                         ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
3933                                                  set_cos,
3934                                                  &dlb2_args->cos_id);
3935                         if (ret != 0) {
3936                                 DLB2_LOG_ERR("%s: Error parsing cos parameter",
3937                                              name);
3938                                 rte_kvargs_free(kvlist);
3939                                 return ret;
3940                         }
3941
3942                         rte_kvargs_free(kvlist);
3943                 }
3944         }
3945         return ret;
3946 }
3947 RTE_LOG_REGISTER(eventdev_dlb2_log_level, pmd.event.dlb2, NOTICE);