1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2022 Intel Corporation
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
25 #include <rte_kvargs.h>
27 #include <rte_malloc.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
32 #include <rte_string_fns.h>
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
39 * Resources exposed to eventdev. Some values overridden at runtime using
40 * values returned by the DLB kernel driver.
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 .driver_name = "", /* probe will set */
47 .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
52 .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
54 .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 .max_event_priority_levels = DLB2_QID_PRIORITIES,
57 .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
59 .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 .max_single_link_event_port_queue_pairs =
63 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 .event_dev_cap = (RTE_EVENT_DEV_CAP_EVENT_QOS |
65 RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
66 RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
67 RTE_EVENT_DEV_CAP_BURST_MODE |
68 RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK |
70 RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT |
71 RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
74 struct process_local_port_data
75 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
78 dlb2_free_qe_mem(struct dlb2_port *qm_port)
83 rte_free(qm_port->qe4);
86 rte_free(qm_port->int_arm_qe);
87 qm_port->int_arm_qe = NULL;
89 rte_free(qm_port->consume_qe);
90 qm_port->consume_qe = NULL;
92 rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
93 dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
96 /* override defaults with value(s) provided on command line */
98 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
99 int *qid_depth_thresholds)
103 for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
104 if (qid_depth_thresholds[q] != 0)
105 dlb2->ev_queues[q].depth_threshold =
106 qid_depth_thresholds[q];
110 /* override defaults with value(s) provided on command line */
112 dlb2_init_cq_weight(struct dlb2_eventdev *dlb2, int *cq_weight)
116 for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++)
117 dlb2->ev_ports[q].cq_weight = cq_weight[q];
121 set_cq_weight(const char *key __rte_unused,
125 struct dlb2_cq_weight *cq_weight = opaque;
126 int first, last, weight, i;
128 if (value == NULL || opaque == NULL) {
129 DLB2_LOG_ERR("NULL pointer\n");
133 /* command line override may take one of the following 3 forms:
134 * qid_depth_thresh=all:<threshold_value> ... all queues
135 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
136 * qid_depth_thresh=qid:<threshold_value> ... just one queue
138 if (sscanf(value, "all:%d", &weight) == 1) {
140 last = DLB2_MAX_NUM_LDB_PORTS - 1;
141 } else if (sscanf(value, "%d-%d:%d", &first, &last, &weight) == 3) {
142 /* we have everything we need */
143 } else if (sscanf(value, "%d:%d", &first, &weight) == 2) {
146 DLB2_LOG_ERR("Error parsing ldb port qe weight devarg. Should be all:val, qid-qid:val, or qid:val\n");
150 if (first > last || first < 0 ||
151 last >= DLB2_MAX_NUM_LDB_PORTS) {
152 DLB2_LOG_ERR("Error parsing ldb port qe weight arg, invalid port value\n");
156 if (weight < 0 || weight > DLB2_MAX_CQ_DEPTH_OVERRIDE) {
157 DLB2_LOG_ERR("Error parsing ldb port qe weight devarg, must be < cq depth\n");
161 for (i = first; i <= last; i++)
162 cq_weight->limit[i] = weight; /* indexed by qid */
167 /* override defaults with value(s) provided on command line */
169 dlb2_init_port_cos(struct dlb2_eventdev *dlb2, int *port_cos)
173 for (q = 0; q < DLB2_MAX_NUM_PORTS_ALL; q++) {
174 dlb2->ev_ports[q].cos_id = port_cos[q];
175 dlb2->cos_ports[port_cos[q]]++;
180 dlb2_init_cos_bw(struct dlb2_eventdev *dlb2,
181 struct dlb2_cos_bw *cos_bw)
184 for (q = 0; q < DLB2_COS_NUM_VALS; q++)
185 dlb2->cos_bw[q] = cos_bw->val[q];
190 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
192 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
193 struct dlb2_hw_resource_info *dlb2_info = &handle->info;
197 /* Query driver resources provisioned for this device */
199 ret = dlb2_iface_get_num_resources(handle,
200 &dlb2->hw_rsrc_query_results);
202 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
206 /* Complete filling in device resource info returned to evdev app,
207 * overriding any default values.
208 * The capabilities (CAPs) were set at compile time.
211 if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
212 num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
214 num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
216 evdev_dlb2_default_info.max_event_queues =
217 dlb2->hw_rsrc_query_results.num_ldb_queues;
219 evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
221 if (dlb2->version == DLB2_HW_V2_5) {
222 evdev_dlb2_default_info.max_num_events =
223 dlb2->hw_rsrc_query_results.num_credits;
225 evdev_dlb2_default_info.max_num_events =
226 dlb2->hw_rsrc_query_results.num_ldb_credits;
228 /* Save off values used when creating the scheduling domain. */
230 handle->info.num_sched_domains =
231 dlb2->hw_rsrc_query_results.num_sched_domains;
233 if (dlb2->version == DLB2_HW_V2_5) {
234 handle->info.hw_rsrc_max.nb_events_limit =
235 dlb2->hw_rsrc_query_results.num_credits;
237 handle->info.hw_rsrc_max.nb_events_limit =
238 dlb2->hw_rsrc_query_results.num_ldb_credits;
240 handle->info.hw_rsrc_max.num_queues =
241 dlb2->hw_rsrc_query_results.num_ldb_queues +
242 dlb2->hw_rsrc_query_results.num_dir_ports;
244 handle->info.hw_rsrc_max.num_ldb_queues =
245 dlb2->hw_rsrc_query_results.num_ldb_queues;
247 handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
249 handle->info.hw_rsrc_max.num_dir_ports =
250 dlb2->hw_rsrc_query_results.num_dir_ports;
252 handle->info.hw_rsrc_max.reorder_window_size =
253 dlb2->hw_rsrc_query_results.num_hist_list_entries;
255 rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
260 #define DLB2_BASE_10 10
263 dlb2_string_to_int(int *result, const char *str)
268 if (str == NULL || result == NULL)
272 ret = strtol(str, &endptr, DLB2_BASE_10);
276 /* long int and int may be different width for some architectures */
277 if (ret < INT_MIN || ret > INT_MAX || endptr == str)
285 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
287 int *socket_id = opaque;
290 ret = dlb2_string_to_int(socket_id, value);
294 if (*socket_id > RTE_MAX_NUMA_NODES)
301 set_max_cq_depth(const char *key __rte_unused,
305 int *max_cq_depth = opaque;
308 if (value == NULL || opaque == NULL) {
309 DLB2_LOG_ERR("NULL pointer\n");
313 ret = dlb2_string_to_int(max_cq_depth, value);
317 if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
318 *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
319 !rte_is_power_of_2(*max_cq_depth)) {
320 DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 2\n",
321 DLB2_MIN_CQ_DEPTH_OVERRIDE,
322 DLB2_MAX_CQ_DEPTH_OVERRIDE);
330 set_max_num_events(const char *key __rte_unused,
334 int *max_num_events = opaque;
337 if (value == NULL || opaque == NULL) {
338 DLB2_LOG_ERR("NULL pointer\n");
342 ret = dlb2_string_to_int(max_num_events, value);
346 if (*max_num_events < 0 || *max_num_events >
347 DLB2_MAX_NUM_LDB_CREDITS) {
348 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
349 DLB2_MAX_NUM_LDB_CREDITS);
357 set_num_dir_credits(const char *key __rte_unused,
361 int *num_dir_credits = opaque;
364 if (value == NULL || opaque == NULL) {
365 DLB2_LOG_ERR("NULL pointer\n");
369 ret = dlb2_string_to_int(num_dir_credits, value);
373 if (*num_dir_credits < 0 ||
374 *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
375 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
376 DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
384 set_dev_id(const char *key __rte_unused,
388 int *dev_id = opaque;
391 if (value == NULL || opaque == NULL) {
392 DLB2_LOG_ERR("NULL pointer\n");
396 ret = dlb2_string_to_int(dev_id, value);
404 set_poll_interval(const char *key __rte_unused,
408 int *poll_interval = opaque;
411 if (value == NULL || opaque == NULL) {
412 DLB2_LOG_ERR("NULL pointer\n");
416 ret = dlb2_string_to_int(poll_interval, value);
424 set_port_cos(const char *key __rte_unused,
428 struct dlb2_port_cos *port_cos = opaque;
429 int first, last, cos_id, i;
431 if (value == NULL || opaque == NULL) {
432 DLB2_LOG_ERR("NULL pointer\n");
436 /* command line override may take one of the following 3 forms:
437 * port_cos=all:<cos_id> ... all ports
438 * port_cos=port-port:<cos_id> ... a range of ports
439 * port_cos=port:<cos_id> ... just one port
441 if (sscanf(value, "all:%d", &cos_id) == 1) {
443 last = DLB2_MAX_NUM_LDB_PORTS - 1;
444 } else if (sscanf(value, "%d-%d:%d", &first, &last, &cos_id) == 3) {
445 /* we have everything we need */
446 } else if (sscanf(value, "%d:%d", &first, &cos_id) == 2) {
449 DLB2_LOG_ERR("Error parsing ldb port port_cos devarg. Should be all:val, port-port:val, or port:val\n");
453 if (first > last || first < 0 ||
454 last >= DLB2_MAX_NUM_LDB_PORTS) {
455 DLB2_LOG_ERR("Error parsing ldb port cos_id arg, invalid port value\n");
459 if (cos_id < DLB2_COS_0 || cos_id > DLB2_COS_3) {
460 DLB2_LOG_ERR("Error parsing ldb port cos_id devarg, must be between 0 and 4\n");
464 for (i = first; i <= last; i++)
465 port_cos->cos_id[i] = cos_id; /* indexed by port */
471 set_cos_bw(const char *key __rte_unused,
475 struct dlb2_cos_bw *cos_bw = opaque;
477 if (opaque == NULL) {
478 DLB2_LOG_ERR("NULL pointer\n");
482 /* format must be %d,%d,%d,%d */
484 if (sscanf(value, "%d,%d,%d,%d", &cos_bw->val[0], &cos_bw->val[1],
485 &cos_bw->val[2], &cos_bw->val[3]) != 4) {
486 DLB2_LOG_ERR("Error parsing cos bandwidth devarg. Should be bw0,bw1,bw2,bw3 where all values combined are <= 100\n");
489 if (cos_bw->val[0] + cos_bw->val[1] + cos_bw->val[2] + cos_bw->val[3] > 100) {
490 DLB2_LOG_ERR("Error parsing cos bandwidth devarg. Should be bw0,bw1,bw2,bw3 where all values combined are <= 100\n");
498 set_sw_credit_quanta(const char *key __rte_unused,
502 int *sw_credit_quanta = opaque;
505 if (value == NULL || opaque == NULL) {
506 DLB2_LOG_ERR("NULL pointer\n");
510 ret = dlb2_string_to_int(sw_credit_quanta, value);
514 if (*sw_credit_quanta <= 0) {
515 DLB2_LOG_ERR("sw_credit_quanta must be > 0\n");
523 set_hw_credit_quanta(const char *key __rte_unused,
527 int *hw_credit_quanta = opaque;
530 if (value == NULL || opaque == NULL) {
531 DLB2_LOG_ERR("NULL pointer\n");
535 ret = dlb2_string_to_int(hw_credit_quanta, value);
543 set_default_depth_thresh(const char *key __rte_unused,
547 int *default_depth_thresh = opaque;
550 if (value == NULL || opaque == NULL) {
551 DLB2_LOG_ERR("NULL pointer\n");
555 ret = dlb2_string_to_int(default_depth_thresh, value);
563 set_vector_opts_enab(const char *key __rte_unused,
567 bool *dlb2_vector_opts_enabled = opaque;
569 if (value == NULL || opaque == NULL) {
570 DLB2_LOG_ERR("NULL pointer\n");
574 if ((*value == 'y') || (*value == 'Y'))
575 *dlb2_vector_opts_enabled = true;
577 *dlb2_vector_opts_enabled = false;
583 set_qid_depth_thresh(const char *key __rte_unused,
587 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
588 int first, last, thresh, i;
590 if (value == NULL || opaque == NULL) {
591 DLB2_LOG_ERR("NULL pointer\n");
595 /* command line override may take one of the following 3 forms:
596 * qid_depth_thresh=all:<threshold_value> ... all queues
597 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
598 * qid_depth_thresh=qid:<threshold_value> ... just one queue
600 if (sscanf(value, "all:%d", &thresh) == 1) {
602 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
603 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
604 /* we have everything we need */
605 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
608 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
612 if (first > last || first < 0 ||
613 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
614 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
618 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
619 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
620 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
624 for (i = first; i <= last; i++)
625 qid_thresh->val[i] = thresh; /* indexed by qid */
631 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
635 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
636 int first, last, thresh, i;
638 if (value == NULL || opaque == NULL) {
639 DLB2_LOG_ERR("NULL pointer\n");
643 /* command line override may take one of the following 3 forms:
644 * qid_depth_thresh=all:<threshold_value> ... all queues
645 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
646 * qid_depth_thresh=qid:<threshold_value> ... just one queue
648 if (sscanf(value, "all:%d", &thresh) == 1) {
650 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
651 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
652 /* we have everything we need */
653 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
656 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
660 if (first > last || first < 0 ||
661 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
662 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
666 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
667 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
668 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
672 for (i = first; i <= last; i++)
673 qid_thresh->val[i] = thresh; /* indexed by qid */
679 dlb2_eventdev_info_get(struct rte_eventdev *dev,
680 struct rte_event_dev_info *dev_info)
682 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
685 ret = dlb2_hw_query_resources(dlb2);
687 const struct rte_eventdev_data *data = dev->data;
689 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
691 /* fn is void, so fall through and return values set up in
696 /* Add num resources currently owned by this domain.
697 * These would become available if the scheduling domain were reset due
698 * to the application recalling eventdev_configure to *reconfigure* the
701 evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
702 evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
703 if (dlb2->version == DLB2_HW_V2_5) {
704 evdev_dlb2_default_info.max_num_events +=
707 evdev_dlb2_default_info.max_num_events +=
708 dlb2->max_ldb_credits;
710 evdev_dlb2_default_info.max_event_queues =
711 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
712 RTE_EVENT_MAX_QUEUES_PER_DEV);
714 evdev_dlb2_default_info.max_num_events =
715 RTE_MIN(evdev_dlb2_default_info.max_num_events,
716 dlb2->max_num_events_override);
718 *dev_info = evdev_dlb2_default_info;
722 dlb2_hw_create_sched_domain(struct dlb2_eventdev *dlb2,
723 struct dlb2_hw_dev *handle,
724 const struct dlb2_hw_rsrcs *resources_asked,
725 uint8_t device_version)
728 uint32_t cos_ports = 0;
729 struct dlb2_create_sched_domain_args *cfg;
731 if (resources_asked == NULL) {
732 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
737 /* Map generic qm resources to dlb2 resources */
738 cfg = &handle->cfg.resources;
740 /* DIR ports and queues */
742 cfg->num_dir_ports = resources_asked->num_dir_ports;
743 if (device_version == DLB2_HW_V2_5)
744 cfg->num_credits = resources_asked->num_credits;
746 cfg->num_dir_credits = resources_asked->num_dir_credits;
750 cfg->num_ldb_queues = resources_asked->num_ldb_queues;
754 /* tally of ports with non default COS */
755 cos_ports = dlb2->cos_ports[1] + dlb2->cos_ports[2] +
758 if (cos_ports > resources_asked->num_ldb_ports) {
759 DLB2_LOG_ERR("dlb2: num_ldb_ports < nonzero cos_ports\n");
764 cfg->cos_strict = 0; /* Best effort */
765 cfg->num_cos_ldb_ports[0] = resources_asked->num_ldb_ports - cos_ports;
766 cfg->num_cos_ldb_ports[1] = dlb2->cos_ports[1];
767 cfg->num_cos_ldb_ports[2] = dlb2->cos_ports[2];
768 cfg->num_cos_ldb_ports[3] = dlb2->cos_ports[3];
770 if (device_version == DLB2_HW_V2)
771 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
773 cfg->num_atomic_inflights =
774 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
777 cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
778 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
780 if (device_version == DLB2_HW_V2_5) {
781 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
783 resources_asked->num_ldb_ports,
785 cfg->num_atomic_inflights,
786 cfg->num_hist_list_entries,
789 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
791 resources_asked->num_ldb_ports,
793 cfg->num_atomic_inflights,
794 cfg->num_hist_list_entries,
795 cfg->num_ldb_credits,
796 cfg->num_dir_credits);
799 /* Configure the QM */
801 ret = dlb2_iface_sched_domain_create(handle, cfg);
803 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
805 dlb2_error_strings[cfg->response.status]);
810 handle->domain_id = cfg->response.id;
811 handle->cfg.configured = true;
819 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
821 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
822 enum dlb2_configuration_state config_state;
825 dlb2_iface_domain_reset(dlb2);
827 /* Free all dynamically allocated port memory */
828 for (i = 0; i < dlb2->num_ports; i++)
829 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
831 /* If reconfiguring, mark the device's queues and ports as "previously
832 * configured." If the user doesn't reconfigure them, the PMD will
833 * reapply their previous configuration when the device is started.
835 config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
838 for (i = 0; i < dlb2->num_ports; i++) {
839 dlb2->ev_ports[i].qm_port.config_state = config_state;
840 /* Reset setup_done so ports can be reconfigured */
841 dlb2->ev_ports[i].setup_done = false;
842 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
843 dlb2->ev_ports[i].link[j].mapped = false;
846 for (i = 0; i < dlb2->num_queues; i++)
847 dlb2->ev_queues[i].qm_queue.config_state = config_state;
849 for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
850 dlb2->ev_queues[i].setup_done = false;
853 dlb2->num_ldb_ports = 0;
854 dlb2->num_dir_ports = 0;
855 dlb2->num_queues = 0;
856 dlb2->num_ldb_queues = 0;
857 dlb2->num_dir_queues = 0;
858 dlb2->configured = false;
861 /* Note: 1 QM instance per QM device, QM instance/device == event device */
863 dlb2_eventdev_configure(const struct rte_eventdev *dev)
865 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
866 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
867 struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
868 const struct rte_eventdev_data *data = dev->data;
869 const struct rte_event_dev_config *config = &data->dev_conf;
872 /* If this eventdev is already configured, we must release the current
873 * scheduling domain before attempting to configure a new one.
875 if (dlb2->configured) {
876 dlb2_hw_reset_sched_domain(dev, true);
877 ret = dlb2_hw_query_resources(dlb2);
879 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
885 if (config->nb_event_queues > rsrcs->num_queues) {
886 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
887 config->nb_event_queues,
891 if (config->nb_event_ports > (rsrcs->num_ldb_ports
892 + rsrcs->num_dir_ports)) {
893 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
894 config->nb_event_ports,
895 (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
898 if (config->nb_events_limit > rsrcs->nb_events_limit) {
899 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
900 config->nb_events_limit,
901 rsrcs->nb_events_limit);
905 if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
906 dlb2->global_dequeue_wait = false;
910 dlb2->global_dequeue_wait = true;
912 /* note size mismatch of timeout vals in eventdev lib. */
913 timeout32 = config->dequeue_timeout_ns;
915 dlb2->global_dequeue_wait_ticks =
916 timeout32 * (rte_get_timer_hz() / 1E9);
919 /* Does this platform support umonitor/umwait? */
920 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
921 dlb2->umwait_allowed = true;
923 rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
924 rsrcs->num_ldb_ports = config->nb_event_ports - rsrcs->num_dir_ports;
925 /* 1 dir queue per dir port */
926 rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
928 if (dlb2->version == DLB2_HW_V2_5) {
929 rsrcs->num_credits = 0;
930 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
931 rsrcs->num_credits = config->nb_events_limit;
933 /* Scale down nb_events_limit by 4 for directed credits,
934 * since there are 4x as many load-balanced credits.
936 rsrcs->num_ldb_credits = 0;
937 rsrcs->num_dir_credits = 0;
939 if (rsrcs->num_ldb_queues)
940 rsrcs->num_ldb_credits = config->nb_events_limit;
941 if (rsrcs->num_dir_ports)
942 rsrcs->num_dir_credits = config->nb_events_limit / 2;
943 if (dlb2->num_dir_credits_override != -1)
944 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
947 if (dlb2_hw_create_sched_domain(dlb2, handle, rsrcs,
948 dlb2->version) < 0) {
949 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
953 dlb2->new_event_limit = config->nb_events_limit;
954 __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
956 /* Save number of ports/queues for this event dev */
957 dlb2->num_ports = config->nb_event_ports;
958 dlb2->num_queues = config->nb_event_queues;
959 dlb2->num_dir_ports = rsrcs->num_dir_ports;
960 dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
961 dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
962 dlb2->num_dir_queues = dlb2->num_dir_ports;
963 if (dlb2->version == DLB2_HW_V2_5) {
964 dlb2->credit_pool = rsrcs->num_credits;
965 dlb2->max_credits = rsrcs->num_credits;
967 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
968 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
969 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
970 dlb2->max_dir_credits = rsrcs->num_dir_credits;
973 dlb2->configured = true;
979 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
981 struct rte_event_port_conf *port_conf)
983 RTE_SET_USED(port_id);
984 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
986 port_conf->new_event_threshold = dlb2->new_event_limit;
987 port_conf->dequeue_depth = 32;
988 port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
989 port_conf->event_port_cfg = 0;
993 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
995 struct rte_event_queue_conf *queue_conf)
998 RTE_SET_USED(queue_id);
1000 queue_conf->nb_atomic_flows = 1024;
1001 queue_conf->nb_atomic_order_sequences = 64;
1002 queue_conf->event_queue_cfg = 0;
1003 queue_conf->priority = 0;
1007 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
1009 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1010 struct dlb2_get_sn_allocation_args cfg;
1015 ret = dlb2_iface_get_sn_allocation(handle, &cfg);
1017 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
1018 ret, dlb2_error_strings[cfg.response.status]);
1022 return cfg.response.id;
1026 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
1028 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1029 struct dlb2_set_sn_allocation_args cfg;
1035 ret = dlb2_iface_set_sn_allocation(handle, &cfg);
1037 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
1038 ret, dlb2_error_strings[cfg.response.status]);
1046 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
1048 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1049 struct dlb2_get_sn_occupancy_args cfg;
1054 ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
1056 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
1057 ret, dlb2_error_strings[cfg.response.status]);
1061 return cfg.response.id;
1064 /* Query the current sequence number allocations and, if they conflict with the
1065 * requested LDB queue configuration, attempt to re-allocate sequence numbers.
1066 * This is best-effort; if it fails, the PMD will attempt to configure the
1067 * load-balanced queue and return an error.
1070 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
1071 const struct rte_event_queue_conf *queue_conf)
1073 int grp_occupancy[DLB2_NUM_SN_GROUPS];
1074 int grp_alloc[DLB2_NUM_SN_GROUPS];
1075 int i, sequence_numbers;
1077 sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
1079 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
1082 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
1083 if (grp_alloc[i] < 0)
1086 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
1088 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
1089 if (grp_occupancy[i] < 0)
1092 /* DLB has at least one available slot for the requested
1093 * sequence numbers, so no further configuration required.
1095 if (grp_alloc[i] == sequence_numbers &&
1096 grp_occupancy[i] < total_slots)
1100 /* None of the sequence number groups are configured for the requested
1101 * sequence numbers, so we have to reconfigure one of them. This is
1102 * only possible if a group is not in use.
1104 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
1105 if (grp_occupancy[i] == 0)
1109 if (i == DLB2_NUM_SN_GROUPS) {
1110 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
1111 __func__, sequence_numbers);
1115 /* Attempt to configure slot i with the requested number of sequence
1116 * numbers. Ignore the return value -- if this fails, the error will be
1117 * caught during subsequent queue configuration.
1119 dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
1123 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
1124 struct dlb2_eventdev_queue *ev_queue,
1125 const struct rte_event_queue_conf *evq_conf)
1127 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1128 struct dlb2_queue *queue = &ev_queue->qm_queue;
1129 struct dlb2_create_ldb_queue_args cfg;
1132 int sched_type = -1;
1134 if (evq_conf == NULL)
1137 if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
1138 if (evq_conf->nb_atomic_order_sequences != 0)
1139 sched_type = RTE_SCHED_TYPE_ORDERED;
1141 sched_type = RTE_SCHED_TYPE_PARALLEL;
1143 sched_type = evq_conf->schedule_type;
1145 cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
1146 cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
1147 cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
1149 if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1150 cfg.num_sequence_numbers = 0;
1151 cfg.num_qid_inflights = 2048;
1154 /* App should set this to the number of hardware flows they want, not
1155 * the overall number of flows they're going to use. E.g. if app is
1156 * using 64 flows and sets compression to 64, best-case they'll get
1157 * 64 unique hashed flows in hardware.
1159 switch (evq_conf->nb_atomic_flows) {
1160 /* Valid DLB2 compression levels */
1165 case (1 * 1024): /* 1K */
1166 case (2 * 1024): /* 2K */
1167 case (4 * 1024): /* 4K */
1168 case (64 * 1024): /* 64K */
1169 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1172 /* Invalid compression level */
1173 cfg.lock_id_comp_level = 0; /* no compression */
1176 if (ev_queue->depth_threshold == 0) {
1177 cfg.depth_threshold = dlb2->default_depth_thresh;
1178 ev_queue->depth_threshold =
1179 dlb2->default_depth_thresh;
1181 cfg.depth_threshold = ev_queue->depth_threshold;
1183 ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1185 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1186 ret, dlb2_error_strings[cfg.response.status]);
1190 qm_qid = cfg.response.id;
1192 /* Save off queue config for debug, resource lookups, and reconfig */
1193 queue->num_qid_inflights = cfg.num_qid_inflights;
1194 queue->num_atm_inflights = cfg.num_atomic_inflights;
1196 queue->sched_type = sched_type;
1197 queue->config_state = DLB2_CONFIGURED;
1199 DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1201 cfg.num_atomic_inflights,
1202 cfg.num_sequence_numbers,
1203 cfg.num_qid_inflights);
1209 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1210 struct dlb2_eventdev_queue *ev_queue,
1211 const struct rte_event_queue_conf *queue_conf)
1213 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1216 if (queue_conf->nb_atomic_order_sequences)
1217 dlb2_program_sn_allocation(dlb2, queue_conf);
1219 qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1221 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1226 dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1228 ev_queue->qm_queue.id = qm_qid;
1233 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1237 for (i = 0; i < dlb2->num_queues; i++) {
1238 if (dlb2->ev_queues[i].setup_done &&
1239 dlb2->ev_queues[i].qm_queue.is_directed)
1247 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1248 struct dlb2_eventdev_queue *ev_queue)
1250 struct dlb2_eventdev_port *ev_port;
1253 for (i = 0; i < dlb2->num_ports; i++) {
1254 ev_port = &dlb2->ev_ports[i];
1256 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1257 if (!ev_port->link[j].valid ||
1258 ev_port->link[j].queue_id != ev_queue->id)
1261 ev_port->link[j].valid = false;
1262 ev_port->num_links--;
1266 ev_queue->num_links = 0;
1270 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1272 const struct rte_event_queue_conf *queue_conf)
1274 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1275 struct dlb2_eventdev_queue *ev_queue;
1278 if (queue_conf == NULL)
1281 if (ev_qid >= dlb2->num_queues)
1284 ev_queue = &dlb2->ev_queues[ev_qid];
1286 ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1287 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1288 ev_queue->id = ev_qid;
1289 ev_queue->conf = *queue_conf;
1291 if (!ev_queue->qm_queue.is_directed) {
1292 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1294 /* The directed queue isn't setup until link time, at which
1295 * point we know its directed port ID. Directed queue setup
1296 * will only fail if this queue is already setup or there are
1297 * no directed queues left to configure.
1301 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1303 if (ev_queue->setup_done ||
1304 dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1308 /* Tear down pre-existing port->queue links */
1309 if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1310 dlb2_queue_link_teardown(dlb2, ev_queue);
1313 ev_queue->setup_done = true;
1319 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1321 struct dlb2_cq_pop_qe *qe;
1323 qe = rte_zmalloc(mz_name,
1324 DLB2_NUM_QES_PER_CACHE_LINE *
1325 sizeof(struct dlb2_cq_pop_qe),
1326 RTE_CACHE_LINE_SIZE);
1329 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1332 qm_port->consume_qe = qe;
1338 /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1341 qe->tokens = 0; /* set at run time */
1344 /* Completion IDs are disabled */
1351 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1353 struct dlb2_enqueue_qe *qe;
1355 qe = rte_zmalloc(mz_name,
1356 DLB2_NUM_QES_PER_CACHE_LINE *
1357 sizeof(struct dlb2_enqueue_qe),
1358 RTE_CACHE_LINE_SIZE);
1361 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1364 qm_port->int_arm_qe = qe;
1366 /* V2 - INT ARM is CQ_TOKEN + FRAG */
1373 /* Completion IDs are disabled */
1380 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1384 sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1386 qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1388 if (qm_port->qe4 == NULL) {
1389 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1394 ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1396 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1400 ret = dlb2_init_consume_qe(qm_port, mz_name);
1402 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1410 dlb2_free_qe_mem(qm_port);
1415 static inline uint16_t
1416 dlb2_event_enqueue_delayed(void *event_port,
1417 const struct rte_event events[]);
1419 static inline uint16_t
1420 dlb2_event_enqueue_burst_delayed(void *event_port,
1421 const struct rte_event events[],
1424 static inline uint16_t
1425 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1426 const struct rte_event events[],
1429 static inline uint16_t
1430 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1431 const struct rte_event events[],
1434 /* Generate the required bitmask for rotate-style expected QE gen bits.
1435 * This requires a pattern of 1's and zeros, starting with expected as
1436 * 1 bits, so when hardware writes 0's they're "new". This requires the
1437 * ring size to be powers of 2 to wrap correctly.
1440 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1442 uint64_t cq_build_mask = 0;
1446 return; /* need to fall back to scalar code */
1449 * all 1's in first u64, all zeros in second is correct bit pattern to
1450 * start. Special casing == 64 easier than adapting complex loop logic.
1452 if (cq_depth == 64) {
1453 qm_port->cq_rolling_mask = 0;
1454 qm_port->cq_rolling_mask_2 = -1;
1458 for (i = 0; i < 64; i += (cq_depth * 2))
1459 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1461 qm_port->cq_rolling_mask = cq_build_mask;
1462 qm_port->cq_rolling_mask_2 = cq_build_mask;
1466 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1467 struct dlb2_eventdev_port *ev_port,
1468 uint32_t dequeue_depth,
1469 uint32_t enqueue_depth)
1471 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1472 struct dlb2_create_ldb_port_args cfg = { {0} };
1474 struct dlb2_port *qm_port = NULL;
1475 char mz_name[RTE_MEMZONE_NAMESIZE];
1476 uint32_t qm_port_id;
1477 uint16_t ldb_credit_high_watermark = 0;
1478 uint16_t dir_credit_high_watermark = 0;
1479 uint16_t credit_high_watermark = 0;
1484 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1485 DLB2_LOG_ERR("dlb2: invalid cq depth, must be at least %d\n",
1490 if (dlb2->version == DLB2_HW_V2 && ev_port->cq_weight != 0 &&
1491 ev_port->cq_weight > dequeue_depth) {
1492 DLB2_LOG_ERR("dlb2: invalid cq depth, must be >= cq weight%d\n",
1493 DLB2_MIN_ENQUEUE_DEPTH);
1497 rte_spinlock_lock(&handle->resource_lock);
1499 /* We round up to the next power of 2 if necessary */
1500 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1501 cfg.cq_depth_threshold = 1;
1503 cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1505 cfg.cos_id = ev_port->cos_id;
1506 cfg.cos_strict = 0;/* best effots */
1508 /* User controls the LDB high watermark via enqueue depth. The DIR high
1509 * watermark is equal, unless the directed credit pool is too small.
1511 if (dlb2->version == DLB2_HW_V2) {
1512 ldb_credit_high_watermark = enqueue_depth;
1513 /* If there are no directed ports, the kernel driver will
1514 * ignore this port's directed credit settings. Don't use
1515 * enqueue_depth if it would require more directed credits
1516 * than are available.
1518 dir_credit_high_watermark =
1519 RTE_MIN(enqueue_depth,
1520 handle->cfg.num_dir_credits / dlb2->num_ports);
1522 credit_high_watermark = enqueue_depth;
1526 ret = dlb2_iface_ldb_port_create(handle, &cfg, dlb2->poll_mode);
1528 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1529 ret, dlb2_error_strings[cfg.response.status]);
1533 qm_port_id = cfg.response.id;
1535 DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1536 ev_port->id, qm_port_id);
1538 qm_port = &ev_port->qm_port;
1539 qm_port->ev_port = ev_port; /* back ptr */
1540 qm_port->dlb2 = dlb2; /* back ptr */
1542 * Allocate and init local qe struct(s).
1543 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1546 snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1549 ret = dlb2_init_qe_mem(qm_port, mz_name);
1551 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1555 qm_port->id = qm_port_id;
1557 if (dlb2->version == DLB2_HW_V2) {
1558 qm_port->cached_ldb_credits = 0;
1559 qm_port->cached_dir_credits = 0;
1560 if (ev_port->cq_weight) {
1561 struct dlb2_enable_cq_weight_args
1562 cq_weight_args = { {0} };
1564 cq_weight_args.port_id = qm_port->id;
1565 cq_weight_args.limit = ev_port->cq_weight;
1566 ret = dlb2_iface_enable_cq_weight(handle, &cq_weight_args);
1568 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1570 dlb2_error_strings[cfg.response. status]);
1574 qm_port->cq_weight = ev_port->cq_weight;
1576 qm_port->cached_credits = 0;
1577 qm_port->cq_weight = 0;
1580 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1581 * the effective depth is smaller.
1583 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1584 qm_port->cq_idx = 0;
1585 qm_port->cq_idx_unmasked = 0;
1587 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1588 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1590 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1592 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1593 /* starting value of gen bit - it toggles at wrap time */
1594 qm_port->gen_bit = 1;
1596 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1598 qm_port->int_armed = false;
1600 /* Save off for later use in info and lookup APIs. */
1601 qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1603 qm_port->dequeue_depth = dequeue_depth;
1604 qm_port->token_pop_thresh = dequeue_depth;
1606 /* The default enqueue functions do not include delayed-pop support for
1607 * performance reasons.
1609 if (qm_port->token_pop_mode == DELAYED_POP) {
1610 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1611 dlb2->event_dev->enqueue_burst =
1612 dlb2_event_enqueue_burst_delayed;
1613 dlb2->event_dev->enqueue_new_burst =
1614 dlb2_event_enqueue_new_burst_delayed;
1615 dlb2->event_dev->enqueue_forward_burst =
1616 dlb2_event_enqueue_forward_burst_delayed;
1619 qm_port->owed_tokens = 0;
1620 qm_port->issued_releases = 0;
1622 /* Save config message too. */
1623 rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1626 qm_port->state = PORT_STARTED; /* enabled at create time */
1627 qm_port->config_state = DLB2_CONFIGURED;
1629 if (dlb2->version == DLB2_HW_V2) {
1630 qm_port->dir_credits = dir_credit_high_watermark;
1631 qm_port->ldb_credits = ldb_credit_high_watermark;
1632 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1633 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1635 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1638 qm_port->ldb_credits,
1639 qm_port->dir_credits);
1641 qm_port->credits = credit_high_watermark;
1642 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1644 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1650 qm_port->use_scalar = false;
1652 #if (!defined RTE_ARCH_X86_64)
1653 qm_port->use_scalar = true;
1655 if ((qm_port->cq_depth > 64) ||
1656 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1657 (dlb2->vector_opts_enabled == false))
1658 qm_port->use_scalar = true;
1661 rte_spinlock_unlock(&handle->resource_lock);
1668 dlb2_free_qe_mem(qm_port);
1670 rte_spinlock_unlock(&handle->resource_lock);
1672 DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1678 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1679 struct dlb2_eventdev_port *ev_port)
1681 struct dlb2_eventdev_queue *ev_queue;
1684 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1685 if (!ev_port->link[i].valid)
1688 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1690 ev_port->link[i].valid = false;
1691 ev_port->num_links--;
1692 ev_queue->num_links--;
1697 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1698 struct dlb2_eventdev_port *ev_port,
1699 uint32_t dequeue_depth,
1700 uint32_t enqueue_depth)
1702 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1703 struct dlb2_create_dir_port_args cfg = { {0} };
1705 struct dlb2_port *qm_port = NULL;
1706 char mz_name[RTE_MEMZONE_NAMESIZE];
1707 uint32_t qm_port_id;
1708 uint16_t ldb_credit_high_watermark = 0;
1709 uint16_t dir_credit_high_watermark = 0;
1710 uint16_t credit_high_watermark = 0;
1712 if (dlb2 == NULL || handle == NULL)
1715 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1716 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1717 DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1721 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1722 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1723 DLB2_MIN_ENQUEUE_DEPTH);
1727 rte_spinlock_lock(&handle->resource_lock);
1729 /* Directed queues are configured at link time. */
1732 /* We round up to the next power of 2 if necessary */
1733 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1734 cfg.cq_depth_threshold = 1;
1736 /* User controls the LDB high watermark via enqueue depth. The DIR high
1737 * watermark is equal, unless the directed credit pool is too small.
1739 if (dlb2->version == DLB2_HW_V2) {
1740 ldb_credit_high_watermark = enqueue_depth;
1741 /* Don't use enqueue_depth if it would require more directed
1742 * credits than are available.
1744 dir_credit_high_watermark =
1745 RTE_MIN(enqueue_depth,
1746 handle->cfg.num_dir_credits / dlb2->num_ports);
1748 credit_high_watermark = enqueue_depth;
1752 ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
1754 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1755 ret, dlb2_error_strings[cfg.response.status]);
1759 qm_port_id = cfg.response.id;
1761 DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1762 ev_port->id, qm_port_id);
1764 qm_port = &ev_port->qm_port;
1765 qm_port->ev_port = ev_port; /* back ptr */
1766 qm_port->dlb2 = dlb2; /* back ptr */
1769 * Init local qe struct(s).
1770 * Note: MOVDIR64 requires the enqueue QE to be aligned
1773 snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1776 ret = dlb2_init_qe_mem(qm_port, mz_name);
1779 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1783 qm_port->id = qm_port_id;
1785 if (dlb2->version == DLB2_HW_V2) {
1786 qm_port->cached_ldb_credits = 0;
1787 qm_port->cached_dir_credits = 0;
1789 qm_port->cached_credits = 0;
1791 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1792 * the effective depth is smaller.
1794 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1795 qm_port->cq_idx = 0;
1796 qm_port->cq_idx_unmasked = 0;
1798 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1799 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1801 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1803 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1804 /* starting value of gen bit - it toggles at wrap time */
1805 qm_port->gen_bit = 1;
1806 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1808 qm_port->int_armed = false;
1810 /* Save off for later use in info and lookup APIs. */
1811 qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1813 qm_port->dequeue_depth = dequeue_depth;
1815 /* Directed ports are auto-pop, by default. */
1816 qm_port->token_pop_mode = AUTO_POP;
1817 qm_port->owed_tokens = 0;
1818 qm_port->issued_releases = 0;
1820 /* Save config message too. */
1821 rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1824 qm_port->state = PORT_STARTED; /* enabled at create time */
1825 qm_port->config_state = DLB2_CONFIGURED;
1827 if (dlb2->version == DLB2_HW_V2) {
1828 qm_port->dir_credits = dir_credit_high_watermark;
1829 qm_port->ldb_credits = ldb_credit_high_watermark;
1830 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1831 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1833 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1836 dir_credit_high_watermark,
1837 ldb_credit_high_watermark);
1839 qm_port->credits = credit_high_watermark;
1840 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1842 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1845 credit_high_watermark);
1848 #if (!defined RTE_ARCH_X86_64)
1849 qm_port->use_scalar = true;
1851 if ((qm_port->cq_depth > 64) ||
1852 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1853 (dlb2->vector_opts_enabled == false))
1854 qm_port->use_scalar = true;
1857 rte_spinlock_unlock(&handle->resource_lock);
1864 dlb2_free_qe_mem(qm_port);
1866 rte_spinlock_unlock(&handle->resource_lock);
1868 DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1874 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1876 const struct rte_event_port_conf *port_conf)
1878 struct dlb2_eventdev *dlb2;
1879 struct dlb2_eventdev_port *ev_port;
1881 uint32_t hw_credit_quanta, sw_credit_quanta;
1883 if (dev == NULL || port_conf == NULL) {
1884 DLB2_LOG_ERR("Null parameter\n");
1888 dlb2 = dlb2_pmd_priv(dev);
1890 if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1893 if (port_conf->dequeue_depth >
1894 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1895 port_conf->enqueue_depth >
1896 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1899 ev_port = &dlb2->ev_ports[ev_port_id];
1901 if (ev_port->setup_done) {
1902 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1906 /* Default for worker ports */
1907 sw_credit_quanta = dlb2->sw_credit_quanta;
1908 hw_credit_quanta = dlb2->hw_credit_quanta;
1910 ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1911 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1914 * Validate credit config before creating port
1917 /* Default for worker ports */
1918 sw_credit_quanta = dlb2->sw_credit_quanta;
1919 hw_credit_quanta = dlb2->hw_credit_quanta;
1921 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1922 /* Producer type ports. Mostly enqueue */
1923 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1924 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1926 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1927 /* Consumer type ports. Mostly dequeue */
1928 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1929 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1931 ev_port->credit_update_quanta = sw_credit_quanta;
1932 ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1934 if (port_conf->enqueue_depth > sw_credit_quanta ||
1935 port_conf->enqueue_depth > hw_credit_quanta) {
1936 DLB2_LOG_ERR("Invalid port config. Enqueue depth %d must be <= credit quanta %d and batch size %d\n",
1937 port_conf->enqueue_depth,
1942 ev_port->enq_retries = port_conf->enqueue_depth / sw_credit_quanta;
1948 if (!ev_port->qm_port.is_directed) {
1949 ret = dlb2_hw_create_ldb_port(dlb2,
1951 port_conf->dequeue_depth,
1952 port_conf->enqueue_depth);
1954 DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1960 ret = dlb2_hw_create_dir_port(dlb2,
1962 port_conf->dequeue_depth,
1963 port_conf->enqueue_depth);
1965 DLB2_LOG_ERR("Failed to create the DIR port\n");
1970 /* Save off port config for reconfig */
1971 ev_port->conf = *port_conf;
1973 ev_port->id = ev_port_id;
1974 ev_port->enq_configured = true;
1975 ev_port->setup_done = true;
1976 ev_port->inflight_max = port_conf->new_event_threshold;
1977 ev_port->implicit_release = !(port_conf->event_port_cfg &
1978 RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1979 ev_port->outstanding_releases = 0;
1980 ev_port->inflight_credits = 0;
1981 ev_port->dlb2 = dlb2; /* reverse link */
1983 /* Tear down pre-existing port->queue links */
1984 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1985 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1987 dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1989 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
1990 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
1991 ev_port->qm_port.use_avx512 = true;
1993 ev_port->qm_port.use_avx512 = false;
1999 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
2000 uint32_t qm_port_id,
2004 struct dlb2_map_qid_args cfg;
2011 cfg.port_id = qm_port_id;
2013 cfg.priority = EV_TO_DLB2_PRIO(priority);
2015 ret = dlb2_iface_map_qid(handle, &cfg);
2017 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
2018 ret, dlb2_error_strings[cfg.response.status]);
2019 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
2020 handle->domain_id, cfg.port_id,
2024 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
2025 qm_qid, qm_port_id);
2032 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
2033 struct dlb2_eventdev_port *ev_port,
2034 struct dlb2_eventdev_queue *ev_queue,
2037 int first_avail = -1;
2040 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2041 if (ev_port->link[i].valid) {
2042 if (ev_port->link[i].queue_id == ev_queue->id &&
2043 ev_port->link[i].priority == priority) {
2044 if (ev_port->link[i].mapped)
2045 return 0; /* already mapped */
2048 } else if (first_avail == -1)
2051 if (first_avail == -1) {
2052 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
2053 ev_port->qm_port.id);
2057 ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
2058 ev_port->qm_port.id,
2059 ev_queue->qm_queue.id,
2063 ev_port->link[first_avail].mapped = true;
2069 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
2070 struct dlb2_eventdev_queue *ev_queue,
2073 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2074 struct dlb2_create_dir_queue_args cfg;
2077 /* The directed port is always configured before its queue */
2078 cfg.port_id = qm_port_id;
2080 if (ev_queue->depth_threshold == 0) {
2081 cfg.depth_threshold = dlb2->default_depth_thresh;
2082 ev_queue->depth_threshold =
2083 dlb2->default_depth_thresh;
2085 cfg.depth_threshold = ev_queue->depth_threshold;
2087 ret = dlb2_iface_dir_queue_create(handle, &cfg);
2089 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
2090 ret, dlb2_error_strings[cfg.response.status]);
2094 return cfg.response.id;
2098 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
2099 struct dlb2_eventdev_queue *ev_queue,
2100 struct dlb2_eventdev_port *ev_port)
2104 qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
2107 DLB2_LOG_ERR("Failed to create the DIR queue\n");
2111 dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
2113 ev_queue->qm_queue.id = qm_qid;
2119 dlb2_do_port_link(struct rte_eventdev *dev,
2120 struct dlb2_eventdev_queue *ev_queue,
2121 struct dlb2_eventdev_port *ev_port,
2124 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2127 /* Don't link until start time. */
2128 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2131 if (ev_queue->qm_queue.is_directed)
2132 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
2134 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
2137 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
2138 ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
2139 ev_queue->id, ev_port->id);
2149 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
2154 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2155 struct dlb2_eventdev_queue *ev_queue;
2156 bool port_is_dir, queue_is_dir;
2158 if (queue_id > dlb2->num_queues) {
2159 rte_errno = -EINVAL;
2163 ev_queue = &dlb2->ev_queues[queue_id];
2165 if (!ev_queue->setup_done &&
2166 ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
2167 rte_errno = -EINVAL;
2171 port_is_dir = ev_port->qm_port.is_directed;
2172 queue_is_dir = ev_queue->qm_queue.is_directed;
2174 if (port_is_dir != queue_is_dir) {
2175 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
2176 queue_is_dir ? "DIR" : "LDB", ev_queue->id,
2177 port_is_dir ? "DIR" : "LDB", ev_port->id);
2179 rte_errno = -EINVAL;
2183 /* Check if there is space for the requested link */
2184 if (!link_exists && index == -1) {
2185 DLB2_LOG_ERR("no space for new link\n");
2186 rte_errno = -ENOSPC;
2190 /* Check if the directed port is already linked */
2191 if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2193 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2195 rte_errno = -EINVAL;
2199 /* Check if the directed queue is already linked */
2200 if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2202 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2204 rte_errno = -EINVAL;
2212 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2213 const uint8_t queues[], const uint8_t priorities[],
2217 struct dlb2_eventdev_port *ev_port = event_port;
2218 struct dlb2_eventdev *dlb2;
2223 if (ev_port == NULL) {
2224 DLB2_LOG_ERR("dlb2: evport not setup\n");
2225 rte_errno = -EINVAL;
2229 if (!ev_port->setup_done &&
2230 ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2231 DLB2_LOG_ERR("dlb2: evport not setup\n");
2232 rte_errno = -EINVAL;
2236 /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2239 if (nb_links == 0) {
2240 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2241 return 0; /* Ignore and return success */
2244 dlb2 = ev_port->dlb2;
2246 DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2248 ev_port->qm_port.is_directed ? "DIR" : "LDB",
2251 for (i = 0; i < nb_links; i++) {
2252 struct dlb2_eventdev_queue *ev_queue;
2253 uint8_t queue_id, prio;
2257 queue_id = queues[i];
2258 prio = priorities[i];
2260 /* Check if the link already exists. */
2261 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2262 if (ev_port->link[j].valid) {
2263 if (ev_port->link[j].queue_id == queue_id) {
2268 } else if (index == -1) {
2272 /* could not link */
2276 /* Check if already linked at the requested priority */
2277 if (found && ev_port->link[j].priority == prio)
2280 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2281 break; /* return index of offending queue */
2283 ev_queue = &dlb2->ev_queues[queue_id];
2285 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2286 break; /* return index of offending queue */
2288 ev_queue->num_links++;
2290 ev_port->link[index].queue_id = queue_id;
2291 ev_port->link[index].priority = prio;
2292 ev_port->link[index].valid = true;
2293 /* Entry already exists? If so, then must be prio change */
2295 ev_port->num_links++;
2301 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2302 uint32_t qm_port_id,
2305 struct dlb2_unmap_qid_args cfg;
2311 cfg.port_id = qm_port_id;
2314 ret = dlb2_iface_unmap_qid(handle, &cfg);
2316 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2317 ret, dlb2_error_strings[cfg.response.status]);
2323 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2324 struct dlb2_eventdev_port *ev_port,
2325 struct dlb2_eventdev_queue *ev_queue)
2329 /* Don't unlink until start time. */
2330 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2333 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2334 if (ev_port->link[i].valid &&
2335 ev_port->link[i].queue_id == ev_queue->id)
2339 /* This is expected with eventdev API!
2340 * It blindly attempts to unmap all queues.
2342 if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2343 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2344 ev_queue->qm_queue.id,
2345 ev_port->qm_port.id);
2349 ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2350 ev_port->qm_port.id,
2351 ev_queue->qm_queue.id);
2353 ev_port->link[i].mapped = false;
2359 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2360 uint8_t queues[], uint16_t nb_unlinks)
2362 struct dlb2_eventdev_port *ev_port = event_port;
2363 struct dlb2_eventdev *dlb2;
2368 if (!ev_port->setup_done) {
2369 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2371 rte_errno = -EINVAL;
2375 if (queues == NULL || nb_unlinks == 0) {
2376 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2377 return 0; /* Ignore and return success */
2380 if (ev_port->qm_port.is_directed) {
2381 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2384 return nb_unlinks; /* as if success */
2387 dlb2 = ev_port->dlb2;
2389 for (i = 0; i < nb_unlinks; i++) {
2390 struct dlb2_eventdev_queue *ev_queue;
2393 if (queues[i] >= dlb2->num_queues) {
2394 DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2395 rte_errno = -EINVAL;
2396 return i; /* return index of offending queue */
2399 ev_queue = &dlb2->ev_queues[queues[i]];
2401 /* Does a link exist? */
2402 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2403 if (ev_port->link[j].queue_id == queues[i] &&
2404 ev_port->link[j].valid)
2407 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2410 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2412 DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2413 ret, ev_port->id, queues[i]);
2414 rte_errno = -ENOENT;
2415 return i; /* return index of offending queue */
2418 ev_port->link[j].valid = false;
2419 ev_port->num_links--;
2420 ev_queue->num_links--;
2427 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2430 struct dlb2_eventdev_port *ev_port = event_port;
2431 struct dlb2_eventdev *dlb2;
2432 struct dlb2_hw_dev *handle;
2433 struct dlb2_pending_port_unmaps_args cfg;
2438 if (!ev_port->setup_done) {
2439 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2441 rte_errno = -EINVAL;
2445 cfg.port_id = ev_port->qm_port.id;
2446 dlb2 = ev_port->dlb2;
2447 handle = &dlb2->qm_instance;
2448 ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2451 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2452 ret, dlb2_error_strings[cfg.response.status]);
2456 return cfg.response.id;
2460 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2462 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2465 /* If an event queue or port was previously configured, but hasn't been
2466 * reconfigured, reapply its original configuration.
2468 for (i = 0; i < dlb2->num_queues; i++) {
2469 struct dlb2_eventdev_queue *ev_queue;
2471 ev_queue = &dlb2->ev_queues[i];
2473 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2476 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2478 DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2483 for (i = 0; i < dlb2->num_ports; i++) {
2484 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2486 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2489 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2491 DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2501 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2503 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2506 /* Perform requested port->queue links */
2507 for (i = 0; i < dlb2->num_ports; i++) {
2508 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2511 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2512 struct dlb2_eventdev_queue *ev_queue;
2513 uint8_t prio, queue_id;
2515 if (!ev_port->link[j].valid)
2518 prio = ev_port->link[j].priority;
2519 queue_id = ev_port->link[j].queue_id;
2521 if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2524 ev_queue = &dlb2->ev_queues[queue_id];
2526 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2535 dlb2_eventdev_start(struct rte_eventdev *dev)
2537 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2538 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2539 struct dlb2_start_domain_args cfg;
2542 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2543 if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2544 DLB2_LOG_ERR("bad state %d for dev_start\n",
2545 (int)dlb2->run_state);
2546 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2549 dlb2->run_state = DLB2_RUN_STATE_STARTING;
2550 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2552 /* If the device was configured more than once, some event ports and/or
2553 * queues may need to be reconfigured.
2555 ret = dlb2_eventdev_reapply_configuration(dev);
2559 /* The DLB PMD delays port links until the device is started. */
2560 ret = dlb2_eventdev_apply_port_links(dev);
2564 for (i = 0; i < dlb2->num_ports; i++) {
2565 if (!dlb2->ev_ports[i].setup_done) {
2566 DLB2_LOG_ERR("dlb2: port %d not setup", i);
2571 for (i = 0; i < dlb2->num_queues; i++) {
2572 if (dlb2->ev_queues[i].num_links == 0) {
2573 DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2578 ret = dlb2_iface_sched_domain_start(handle, &cfg);
2580 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2581 ret, dlb2_error_strings[cfg.response.status]);
2585 dlb2->run_state = DLB2_RUN_STATE_STARTED;
2586 DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2591 static inline uint32_t
2592 dlb2_port_credits_get(struct dlb2_port *qm_port,
2593 enum dlb2_hw_queue_types type)
2595 uint32_t credits = *qm_port->credit_pool[type];
2596 /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2597 uint32_t batch_size = qm_port->hw_credit_quanta;
2599 if (unlikely(credits < batch_size))
2600 batch_size = credits;
2602 if (likely(credits &&
2603 __atomic_compare_exchange_n(
2604 qm_port->credit_pool[type],
2605 &credits, credits - batch_size, false,
2606 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2613 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2614 struct dlb2_eventdev_port *ev_port)
2616 uint16_t quanta = ev_port->credit_update_quanta;
2618 if (ev_port->inflight_credits >= quanta * 2) {
2619 /* Replenish credits, saving one quanta for enqueues */
2620 uint16_t val = ev_port->inflight_credits - quanta;
2622 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2623 ev_port->inflight_credits -= val;
2628 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2629 struct dlb2_eventdev_port *ev_port)
2631 uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2635 if (unlikely(ev_port->inflight_max < sw_inflights)) {
2636 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2637 rte_errno = -ENOSPC;
2641 if (ev_port->inflight_credits < num) {
2642 /* check if event enqueue brings ev_port over max threshold */
2643 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2645 if (sw_inflights + credit_update_quanta >
2646 dlb2->new_event_limit) {
2648 ev_port->stats.traffic.tx_nospc_new_event_limit,
2650 rte_errno = -ENOSPC;
2654 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2656 ev_port->inflight_credits += (credit_update_quanta);
2658 if (ev_port->inflight_credits < num) {
2660 ev_port->stats.traffic.tx_nospc_inflight_credits,
2662 rte_errno = -ENOSPC;
2671 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2673 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2674 qm_port->cached_ldb_credits =
2675 dlb2_port_credits_get(qm_port,
2677 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2679 qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2681 DLB2_LOG_DBG("ldb credits exhausted\n");
2682 return 1; /* credits exhausted */
2690 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2692 if (unlikely(qm_port->cached_dir_credits == 0)) {
2693 qm_port->cached_dir_credits =
2694 dlb2_port_credits_get(qm_port,
2696 if (unlikely(qm_port->cached_dir_credits == 0)) {
2698 qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2700 DLB2_LOG_DBG("dir credits exhausted\n");
2701 return 1; /* credits exhausted */
2709 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2711 if (unlikely(qm_port->cached_credits == 0)) {
2712 qm_port->cached_credits =
2713 dlb2_port_credits_get(qm_port,
2714 DLB2_COMBINED_POOL);
2715 if (unlikely(qm_port->cached_credits == 0)) {
2717 qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2718 DLB2_LOG_DBG("credits exhausted\n");
2719 return 1; /* credits exhausted */
2726 static __rte_always_inline void
2727 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2728 struct process_local_port_data *port_data)
2730 dlb2_movdir64b(port_data->pp_addr, qe4);
2734 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2736 struct process_local_port_data *port_data;
2737 struct dlb2_cq_pop_qe *qe;
2739 RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2741 qe = qm_port->consume_qe;
2743 qe->tokens = num - 1;
2745 /* No store fence needed since no pointer is being sent, and CQ token
2746 * pops can be safely reordered with other HCWs.
2748 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2750 dlb2_movntdq_single(port_data->pp_addr, qe);
2752 DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2754 qm_port->owed_tokens = 0;
2760 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2762 struct process_local_port_data *port_data)
2764 /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2765 * application writes complete before enqueueing the QE.
2770 dlb2_pp_write(qm_port->qe4, port_data);
2774 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2776 struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2777 int num = qm_port->owed_tokens;
2779 qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2780 qe[idx].tokens = num - 1;
2782 qm_port->owed_tokens = 0;
2786 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2787 struct dlb2_port *qm_port,
2788 const struct rte_event ev[],
2789 uint8_t *sched_type,
2792 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2793 struct dlb2_eventdev_queue *ev_queue;
2794 uint16_t *cached_credits = NULL;
2795 struct dlb2_queue *qm_queue;
2797 ev_queue = &dlb2->ev_queues[ev->queue_id];
2798 qm_queue = &ev_queue->qm_queue;
2799 *queue_id = qm_queue->id;
2801 /* Ignore sched_type and hardware credits on release events */
2802 if (ev->op == RTE_EVENT_OP_RELEASE)
2805 if (!qm_queue->is_directed) {
2806 /* Load balanced destination queue */
2808 if (dlb2->version == DLB2_HW_V2) {
2809 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2810 rte_errno = -ENOSPC;
2813 cached_credits = &qm_port->cached_ldb_credits;
2815 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2816 rte_errno = -ENOSPC;
2819 cached_credits = &qm_port->cached_credits;
2821 switch (ev->sched_type) {
2822 case RTE_SCHED_TYPE_ORDERED:
2823 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2824 if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2825 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2827 rte_errno = -EINVAL;
2830 *sched_type = DLB2_SCHED_ORDERED;
2832 case RTE_SCHED_TYPE_ATOMIC:
2833 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2834 *sched_type = DLB2_SCHED_ATOMIC;
2836 case RTE_SCHED_TYPE_PARALLEL:
2837 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2838 if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2839 *sched_type = DLB2_SCHED_ORDERED;
2841 *sched_type = DLB2_SCHED_UNORDERED;
2844 DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2845 DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2846 rte_errno = -EINVAL;
2850 /* Directed destination queue */
2852 if (dlb2->version == DLB2_HW_V2) {
2853 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2854 rte_errno = -ENOSPC;
2857 cached_credits = &qm_port->cached_dir_credits;
2859 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2860 rte_errno = -ENOSPC;
2863 cached_credits = &qm_port->cached_credits;
2865 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2867 *sched_type = DLB2_SCHED_DIRECTED;
2872 case RTE_EVENT_OP_NEW:
2873 /* Check that a sw credit is available */
2874 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2875 rte_errno = -ENOSPC;
2878 ev_port->inflight_credits--;
2879 (*cached_credits)--;
2881 case RTE_EVENT_OP_FORWARD:
2882 /* Check for outstanding_releases underflow. If this occurs,
2883 * the application is not using the EVENT_OPs correctly; for
2884 * example, forwarding or releasing events that were not
2887 RTE_ASSERT(ev_port->outstanding_releases > 0);
2888 ev_port->outstanding_releases--;
2889 qm_port->issued_releases++;
2890 (*cached_credits)--;
2892 case RTE_EVENT_OP_RELEASE:
2893 ev_port->inflight_credits++;
2894 /* Check for outstanding_releases underflow. If this occurs,
2895 * the application is not using the EVENT_OPs correctly; for
2896 * example, forwarding or releasing events that were not
2899 RTE_ASSERT(ev_port->outstanding_releases > 0);
2900 ev_port->outstanding_releases--;
2901 qm_port->issued_releases++;
2903 /* Replenish s/w credits if enough are cached */
2904 dlb2_replenish_sw_credits(dlb2, ev_port);
2908 DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2909 DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2911 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2912 if (ev->op != RTE_EVENT_OP_RELEASE) {
2913 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2914 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2921 static inline uint16_t
2922 __dlb2_event_enqueue_burst(void *event_port,
2923 const struct rte_event events[],
2927 struct dlb2_eventdev_port *ev_port = event_port;
2928 struct dlb2_port *qm_port = &ev_port->qm_port;
2929 struct process_local_port_data *port_data;
2930 int retries = ev_port->enq_retries;
2933 RTE_ASSERT(ev_port->enq_configured);
2934 RTE_ASSERT(events != NULL);
2938 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2941 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2942 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2946 memset(qm_port->qe4,
2948 DLB2_NUM_QES_PER_CACHE_LINE *
2949 sizeof(struct dlb2_enqueue_qe));
2951 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2952 const struct rte_event *ev = &events[i + j];
2953 int16_t thresh = qm_port->token_pop_thresh;
2957 qm_port->token_pop_mode == DELAYED_POP &&
2958 (ev->op == RTE_EVENT_OP_FORWARD ||
2959 ev->op == RTE_EVENT_OP_RELEASE) &&
2960 qm_port->issued_releases >= thresh - 1) {
2961 /* Insert the token pop QE and break out. This
2962 * may result in a partial HCW, but that is
2963 * simpler than supporting arbitrary QE
2966 dlb2_construct_token_pop_qe(qm_port, j);
2968 /* Reset the releases for the next QE batch */
2969 qm_port->issued_releases -= thresh;
2977 * Retry if insufficient credits
2980 ret = dlb2_event_enqueue_prep(ev_port,
2985 } while ((ret == -ENOSPC) && (retries-- > 0));
2994 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2995 sched_types, queue_ids);
2997 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2999 /* Don't include the token pop QE in the enqueue count */
3002 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3005 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3013 dlb2_event_enqueue_burst(void *event_port,
3014 const struct rte_event events[],
3017 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3021 dlb2_event_enqueue_burst_delayed(void *event_port,
3022 const struct rte_event events[],
3025 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3028 static inline uint16_t
3029 dlb2_event_enqueue(void *event_port,
3030 const struct rte_event events[])
3032 return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3035 static inline uint16_t
3036 dlb2_event_enqueue_delayed(void *event_port,
3037 const struct rte_event events[])
3039 return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3043 dlb2_event_enqueue_new_burst(void *event_port,
3044 const struct rte_event events[],
3047 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3051 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3052 const struct rte_event events[],
3055 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3059 dlb2_event_enqueue_forward_burst(void *event_port,
3060 const struct rte_event events[],
3063 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3067 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3068 const struct rte_event events[],
3071 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3075 dlb2_event_release(struct dlb2_eventdev *dlb2,
3079 struct process_local_port_data *port_data;
3080 struct dlb2_eventdev_port *ev_port;
3081 struct dlb2_port *qm_port;
3084 if (port_id > dlb2->num_ports) {
3085 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3087 rte_errno = -EINVAL;
3091 ev_port = &dlb2->ev_ports[port_id];
3092 qm_port = &ev_port->qm_port;
3093 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3097 if (qm_port->is_directed) {
3099 goto sw_credit_update;
3107 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3108 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3109 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3110 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3113 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3114 int16_t thresh = qm_port->token_pop_thresh;
3116 if (qm_port->token_pop_mode == DELAYED_POP &&
3117 qm_port->issued_releases >= thresh - 1) {
3118 /* Insert the token pop QE */
3119 dlb2_construct_token_pop_qe(qm_port, j);
3121 /* Reset the releases for the next QE batch */
3122 qm_port->issued_releases -= thresh;
3129 qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3130 qm_port->issued_releases++;
3133 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3135 /* Don't include the token pop QE in the release count */
3140 /* each release returns one credit */
3141 if (unlikely(!ev_port->outstanding_releases)) {
3142 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3146 ev_port->outstanding_releases -= i;
3147 ev_port->inflight_credits += i;
3149 /* Replenish s/w credits if enough releases are performed */
3150 dlb2_replenish_sw_credits(dlb2, ev_port);
3154 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3156 uint32_t batch_size = qm_port->hw_credit_quanta;
3158 /* increment port credits, and return to pool if exceeds threshold */
3159 if (!qm_port->is_directed) {
3160 if (qm_port->dlb2->version == DLB2_HW_V2) {
3161 qm_port->cached_ldb_credits += num;
3162 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3164 qm_port->credit_pool[DLB2_LDB_QUEUE],
3165 batch_size, __ATOMIC_SEQ_CST);
3166 qm_port->cached_ldb_credits -= batch_size;
3169 qm_port->cached_credits += num;
3170 if (qm_port->cached_credits >= 2 * batch_size) {
3172 qm_port->credit_pool[DLB2_COMBINED_POOL],
3173 batch_size, __ATOMIC_SEQ_CST);
3174 qm_port->cached_credits -= batch_size;
3178 if (qm_port->dlb2->version == DLB2_HW_V2) {
3179 qm_port->cached_dir_credits += num;
3180 if (qm_port->cached_dir_credits >= 2 * batch_size) {
3182 qm_port->credit_pool[DLB2_DIR_QUEUE],
3183 batch_size, __ATOMIC_SEQ_CST);
3184 qm_port->cached_dir_credits -= batch_size;
3187 qm_port->cached_credits += num;
3188 if (qm_port->cached_credits >= 2 * batch_size) {
3190 qm_port->credit_pool[DLB2_COMBINED_POOL],
3191 batch_size, __ATOMIC_SEQ_CST);
3192 qm_port->cached_credits -= batch_size;
3198 #define CLB_MASK_IDX 0
3199 #define CLB_VAL_IDX 1
3201 dlb2_monitor_callback(const uint64_t val,
3202 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3204 /* abort if the value matches */
3205 return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3209 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3210 struct dlb2_eventdev_port *ev_port,
3211 struct dlb2_port *qm_port,
3213 uint64_t start_ticks)
3215 struct process_local_port_data *port_data;
3216 uint64_t elapsed_ticks;
3218 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3220 elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3222 /* Wait/poll time expired */
3223 if (elapsed_ticks >= timeout) {
3225 } else if (dlb2->umwait_allowed) {
3226 struct rte_power_monitor_cond pmc;
3227 volatile struct dlb2_dequeue_qe *cq_base;
3230 struct dlb2_dequeue_qe qe;
3232 uint64_t expected_value;
3233 volatile uint64_t *monitor_addr;
3235 qe_mask.qe.cq_gen = 1; /* set mask */
3237 cq_base = port_data->cq_base;
3238 monitor_addr = (volatile uint64_t *)(volatile void *)
3239 &cq_base[qm_port->cq_idx];
3240 monitor_addr++; /* cq_gen bit is in second 64bit location */
3242 if (qm_port->gen_bit)
3243 expected_value = qe_mask.raw_qe[1];
3247 pmc.addr = monitor_addr;
3248 /* store expected value and comparison mask in opaque data */
3249 pmc.opaque[CLB_VAL_IDX] = expected_value;
3250 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3251 /* set up callback */
3252 pmc.fn = dlb2_monitor_callback;
3253 pmc.size = sizeof(uint64_t);
3255 rte_power_monitor(&pmc, timeout + start_ticks);
3257 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3259 uint64_t poll_interval = dlb2->poll_interval;
3260 uint64_t curr_ticks = rte_get_timer_cycles();
3261 uint64_t init_ticks = curr_ticks;
3263 while ((curr_ticks - start_ticks < timeout) &&
3264 (curr_ticks - init_ticks < poll_interval))
3265 curr_ticks = rte_get_timer_cycles();
3271 static __rte_noinline int
3272 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3273 struct dlb2_port *qm_port,
3274 struct rte_event *events,
3275 struct dlb2_dequeue_qe *qes,
3278 uint8_t *qid_mappings = qm_port->qid_mappings;
3281 for (i = 0, num = 0; i < cnt; i++) {
3282 struct dlb2_dequeue_qe *qe = &qes[i];
3283 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3284 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3285 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3286 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3287 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3290 /* Fill in event information.
3291 * Note that flow_id must be embedded in the data by
3292 * the app, such as the mbuf RSS hash field if the data
3295 if (unlikely(qe->error)) {
3296 DLB2_LOG_ERR("QE error bit ON\n");
3297 DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3298 dlb2_consume_qe_immediate(qm_port, 1);
3299 continue; /* Ignore */
3302 events[num].u64 = qe->data;
3303 events[num].flow_id = qe->flow_id;
3304 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3305 events[num].event_type = qe->u.event_type.major;
3306 events[num].sub_event_type = qe->u.event_type.sub;
3307 events[num].sched_type = sched_type_map[qe->sched_type];
3308 events[num].impl_opaque = qe->qid_depth;
3310 /* qid not preserved for directed queues */
3311 if (qm_port->is_directed)
3312 evq_id = ev_port->link[0].queue_id;
3314 evq_id = qid_mappings[qe->qid];
3316 events[num].queue_id = evq_id;
3318 ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3320 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3324 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3330 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3331 struct dlb2_port *qm_port,
3332 struct rte_event *events,
3333 struct dlb2_dequeue_qe *qes)
3335 int sched_type_map[] = {
3336 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3337 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3338 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3339 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3341 const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3342 uint8_t *qid_mappings = qm_port->qid_mappings;
3345 /* In the unlikely case that any of the QE error bits are set, process
3346 * them one at a time.
3348 if (unlikely(qes[0].error || qes[1].error ||
3349 qes[2].error || qes[3].error))
3350 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3353 events[0].u64 = qes[0].data;
3354 events[1].u64 = qes[1].data;
3355 events[2].u64 = qes[2].data;
3356 events[3].u64 = qes[3].data;
3358 /* Construct the metadata portion of two struct rte_events
3359 * in one 128b SSE register. Event metadata is constructed in the SSE
3360 * registers like so:
3361 * sse_evt[0][63:0]: event[0]'s metadata
3362 * sse_evt[0][127:64]: event[1]'s metadata
3363 * sse_evt[1][63:0]: event[2]'s metadata
3364 * sse_evt[1][127:64]: event[3]'s metadata
3366 sse_evt[0] = _mm_setzero_si128();
3367 sse_evt[1] = _mm_setzero_si128();
3369 /* Convert the hardware queue ID to an event queue ID and store it in
3371 * sse_evt[0][47:40] = qid_mappings[qes[0].qid]
3372 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3373 * sse_evt[1][47:40] = qid_mappings[qes[2].qid]
3374 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3376 #define DLB_EVENT_QUEUE_ID_BYTE 5
3377 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3378 qid_mappings[qes[0].qid],
3379 DLB_EVENT_QUEUE_ID_BYTE);
3380 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3381 qid_mappings[qes[1].qid],
3382 DLB_EVENT_QUEUE_ID_BYTE + 8);
3383 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3384 qid_mappings[qes[2].qid],
3385 DLB_EVENT_QUEUE_ID_BYTE);
3386 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3387 qid_mappings[qes[3].qid],
3388 DLB_EVENT_QUEUE_ID_BYTE + 8);
3390 /* Convert the hardware priority to an event priority and store it in
3391 * the metadata, while also returning the queue depth status
3392 * value captured by the hardware, storing it in impl_opaque, which can
3393 * be read by the application but not modified
3394 * sse_evt[0][55:48] = DLB2_TO_EV_PRIO(qes[0].priority)
3395 * sse_evt[0][63:56] = qes[0].qid_depth
3396 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3397 * sse_evt[0][127:120] = qes[1].qid_depth
3398 * sse_evt[1][55:48] = DLB2_TO_EV_PRIO(qes[2].priority)
3399 * sse_evt[1][63:56] = qes[2].qid_depth
3400 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3401 * sse_evt[1][127:120] = qes[3].qid_depth
3403 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3404 #define DLB_BYTE_SHIFT 8
3406 _mm_insert_epi16(sse_evt[0],
3407 DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3408 (qes[0].qid_depth << DLB_BYTE_SHIFT),
3409 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3411 _mm_insert_epi16(sse_evt[0],
3412 DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3413 (qes[1].qid_depth << DLB_BYTE_SHIFT),
3414 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3416 _mm_insert_epi16(sse_evt[1],
3417 DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3418 (qes[2].qid_depth << DLB_BYTE_SHIFT),
3419 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3421 _mm_insert_epi16(sse_evt[1],
3422 DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3423 (qes[3].qid_depth << DLB_BYTE_SHIFT),
3424 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3426 /* Write the event type, sub event type, and flow_id to the event
3428 * sse_evt[0][31:0] = qes[0].flow_id |
3429 * qes[0].u.event_type.major << 28 |
3430 * qes[0].u.event_type.sub << 20;
3431 * sse_evt[0][95:64] = qes[1].flow_id |
3432 * qes[1].u.event_type.major << 28 |
3433 * qes[1].u.event_type.sub << 20;
3434 * sse_evt[1][31:0] = qes[2].flow_id |
3435 * qes[2].u.event_type.major << 28 |
3436 * qes[2].u.event_type.sub << 20;
3437 * sse_evt[1][95:64] = qes[3].flow_id |
3438 * qes[3].u.event_type.major << 28 |
3439 * qes[3].u.event_type.sub << 20;
3441 #define DLB_EVENT_EV_TYPE_DW 0
3442 #define DLB_EVENT_EV_TYPE_SHIFT 28
3443 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3444 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3446 qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3447 qes[0].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3448 DLB_EVENT_EV_TYPE_DW);
3449 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3451 qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3452 qes[1].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3453 DLB_EVENT_EV_TYPE_DW + 2);
3454 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3456 qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3457 qes[2].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3458 DLB_EVENT_EV_TYPE_DW);
3459 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3461 qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3462 qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3463 DLB_EVENT_EV_TYPE_DW + 2);
3465 /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3467 * sse_evt[0][39:32] = sched_type_map[qes[0].sched_type] << 6
3468 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3469 * sse_evt[1][39:32] = sched_type_map[qes[2].sched_type] << 6
3470 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3472 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3473 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3474 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3475 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3476 DLB_EVENT_SCHED_TYPE_BYTE);
3477 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3478 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3479 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3480 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3481 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3482 DLB_EVENT_SCHED_TYPE_BYTE);
3483 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3484 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3485 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3487 /* Store the metadata to the event (use the double-precision
3488 * _mm_storeh_pd because there is no integer function for storing the
3490 * events[0].event = sse_evt[0][63:0]
3491 * events[1].event = sse_evt[0][127:64]
3492 * events[2].event = sse_evt[1][63:0]
3493 * events[3].event = sse_evt[1][127:64]
3495 _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3496 _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3497 _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3498 _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3500 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3501 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3502 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3503 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3506 ev_port->stats.queue[events[0].queue_id].
3507 qid_depth[qes[0].qid_depth],
3510 ev_port->stats.queue[events[1].queue_id].
3511 qid_depth[qes[1].qid_depth],
3514 ev_port->stats.queue[events[2].queue_id].
3515 qid_depth[qes[2].qid_depth],
3518 ev_port->stats.queue[events[3].queue_id].
3519 qid_depth[qes[3].qid_depth],
3522 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3527 static __rte_always_inline int
3528 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3530 volatile struct dlb2_dequeue_qe *cq_addr;
3531 uint8_t xor_mask[2] = {0x0F, 0x00};
3532 const uint8_t and_mask = 0x0F;
3533 __m128i *qes = (__m128i *)qe;
3534 uint8_t gen_bits, gen_bit;
3538 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3540 idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3541 /* Load the next 4 QEs */
3542 addr[0] = (uintptr_t)&cq_addr[idx];
3543 addr[1] = (uintptr_t)&cq_addr[(idx + 4) & qm_port->cq_depth_mask];
3544 addr[2] = (uintptr_t)&cq_addr[(idx + 8) & qm_port->cq_depth_mask];
3545 addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3547 /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3548 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3549 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3550 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3551 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3553 /* Correct the xor_mask for wrap-around QEs */
3554 gen_bit = qm_port->gen_bit;
3555 xor_mask[gen_bit] ^= !!((idx + 4) > qm_port->cq_depth_mask) << 1;
3556 xor_mask[gen_bit] ^= !!((idx + 8) > qm_port->cq_depth_mask) << 2;
3557 xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3559 /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3560 * valid, then QEs[0:N-1] are too.
3562 qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3563 rte_compiler_barrier();
3564 qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3565 rte_compiler_barrier();
3566 qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3567 rte_compiler_barrier();
3568 qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3570 /* Extract and combine the gen bits */
3571 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3572 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3573 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3574 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3576 /* XOR the combined bits such that a 1 represents a valid QE */
3577 gen_bits ^= xor_mask[gen_bit];
3579 /* Mask off gen bits we don't care about */
3580 gen_bits &= and_mask;
3582 return __builtin_popcount(gen_bits);
3586 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3587 struct rte_event *events,
3593 __m128i v_qe_status,
3594 uint32_t valid_events)
3596 /* Look up the event QIDs, using the hardware QIDs to index the
3597 * port's QID mapping.
3599 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3600 * passed along in registers as the QE data is required later.
3602 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3603 * 32-bit slice of each QE, so makes up a full SSE register. This
3604 * allows parallel processing of 4x QEs in a single register.
3607 __m128i v_qid_done = {0};
3608 int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3609 int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3610 int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3611 int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3613 int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3614 int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3615 int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3616 int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3618 int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3619 int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3620 int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3621 int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3623 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3624 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3625 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3626 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3628 /* Schedule field remapping using byte shuffle
3629 * - Full byte containing sched field handled here (op, rsvd are zero)
3630 * - Note sanitizing the register requires two masking ANDs:
3631 * 1) to strip prio/msg_type from byte for correct shuffle lookup
3632 * 2) to strip any non-sched-field lanes from any results to OR later
3633 * - Final byte result is >> 10 to another byte-lane inside the u32.
3634 * This makes the final combination OR easier to make the rte_event.
3636 __m128i v_sched_done;
3637 __m128i v_sched_bits;
3639 static const uint8_t sched_type_map[16] = {
3640 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3641 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3642 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3643 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3645 static const uint8_t sched_and_mask[16] = {
3646 0x00, 0x00, 0x00, 0x03,
3647 0x00, 0x00, 0x00, 0x03,
3648 0x00, 0x00, 0x00, 0x03,
3649 0x00, 0x00, 0x00, 0x03,
3651 const __m128i v_sched_map = _mm_loadu_si128(
3652 (const __m128i *)sched_type_map);
3653 __m128i v_sched_mask = _mm_loadu_si128(
3654 (const __m128i *)&sched_and_mask);
3655 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3656 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3658 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3660 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3663 /* Priority handling
3664 * - QE provides 3 bits of priority
3665 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3666 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3668 __m128i v_prio_done;
3670 static const uint8_t prio_mask[16] = {
3671 0x00, 0x00, 0x00, 0x07 << 5,
3672 0x00, 0x00, 0x00, 0x07 << 5,
3673 0x00, 0x00, 0x00, 0x07 << 5,
3674 0x00, 0x00, 0x00, 0x07 << 5,
3676 __m128i v_prio_mask = _mm_loadu_si128(
3677 (const __m128i *)prio_mask);
3678 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3679 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3682 /* Event Sub/Type handling:
3683 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3684 * to get the sub/ev type data into rte_event location, clearing the
3685 * lower 20 bits in the process.
3687 __m128i v_types_done;
3689 static const uint8_t event_mask[16] = {
3690 0x0f, 0x00, 0x00, 0x00,
3691 0x0f, 0x00, 0x00, 0x00,
3692 0x0f, 0x00, 0x00, 0x00,
3693 0x0f, 0x00, 0x00, 0x00,
3695 static const uint8_t sub_event_mask[16] = {
3696 0xff, 0x00, 0x00, 0x00,
3697 0xff, 0x00, 0x00, 0x00,
3698 0xff, 0x00, 0x00, 0x00,
3699 0xff, 0x00, 0x00, 0x00,
3701 static const uint8_t flow_mask[16] = {
3702 0xff, 0xff, 0x00, 0x00,
3703 0xff, 0xff, 0x00, 0x00,
3704 0xff, 0xff, 0x00, 0x00,
3705 0xff, 0xff, 0x00, 0x00,
3707 __m128i v_event_mask = _mm_loadu_si128(
3708 (const __m128i *)event_mask);
3709 __m128i v_sub_event_mask = _mm_loadu_si128(
3710 (const __m128i *)sub_event_mask);
3711 __m128i v_flow_mask = _mm_loadu_si128(
3712 (const __m128i *)flow_mask);
3713 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3714 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3715 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3716 v_type = _mm_slli_epi32(v_type, 8);
3717 v_types_done = _mm_or_si128(v_type, v_sub);
3718 v_types_done = _mm_slli_epi32(v_types_done, 20);
3719 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3720 v_types_done = _mm_or_si128(v_types_done, v_flow);
3723 /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3724 * with the rte_event, allowing unpacks to move/blend with payload.
3726 __m128i v_q_s_p_done;
3728 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3729 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3730 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3733 __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3735 /* Unpack evs into u64 metadata, then indiv events */
3736 v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3737 v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3739 switch (valid_events) {
3741 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3742 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3743 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3744 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3748 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3749 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3750 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3754 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3755 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3756 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3757 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3761 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3762 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3763 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3768 static __rte_always_inline int
3769 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3770 uint32_t max_events)
3772 /* Using unmasked idx for perf, and masking manually */
3773 uint16_t idx = qm_port->cq_idx_unmasked;
3774 volatile struct dlb2_dequeue_qe *cq_addr;
3776 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3778 uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3779 qm_port->cq_depth_mask];
3780 uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx + 8) &
3781 qm_port->cq_depth_mask];
3782 uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx + 4) &
3783 qm_port->cq_depth_mask];
3784 uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx + 0) &
3785 qm_port->cq_depth_mask];
3787 /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3788 __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3789 rte_compiler_barrier();
3790 __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3791 rte_compiler_barrier();
3792 __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3793 rte_compiler_barrier();
3794 __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3796 /* Generate the pkt_shuffle mask;
3797 * - Avoids load in otherwise load-heavy section of code
3798 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3800 const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3801 __m128i v_zeros = _mm_setzero_si128();
3802 __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3803 __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3805 /* Extract u32 components required from the QE
3806 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3807 * - QE[96 to 127] for status (cq gen bit, error)
3809 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3811 __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3812 __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3813 __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3814 __m128i v_qe_meta = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3816 /* Status byte (gen_bit, error) handling:
3817 * - Shuffle to lanes 0,1,2,3, clear all others
3818 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3819 * - Shift right by 2 for error bit to MSB, movemask to scalar
3821 __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3822 __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3823 int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3825 /* Expected vs Reality of QE Gen bits
3826 * - cq_rolling_mask provides expected bits
3827 * - QE loads, unpacks/shuffle and movemask provides reality
3828 * - XOR of the two gives bitmask of new packets
3829 * - POPCNT to get the number of new events
3831 uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3832 uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3833 uint32_t count_new = __builtin_popcount(qe_xor_bits);
3834 count_new = RTE_MIN(count_new, max_events);
3838 /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3840 uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3841 uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3842 uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3843 uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3845 /* shifted out of m2 into MSB of m */
3846 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3848 /* shifted out of m "looped back" into MSB of m2 */
3849 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3851 /* Prefetch the next QEs - should run as IPC instead of cycles */
3852 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3853 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3854 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3855 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3857 /* Convert QEs from XMM regs to events and store events directly */
3858 _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3859 v_qe_0, v_qe_meta, v_qe_status, count_new);
3865 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3867 uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3869 qm_port->cq_idx_unmasked = idx;
3870 qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3871 qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3874 static inline int16_t
3875 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3876 struct dlb2_eventdev_port *ev_port,
3877 struct rte_event *events,
3879 uint64_t dequeue_timeout_ticks)
3881 uint64_t start_ticks = 0ULL;
3882 struct dlb2_port *qm_port;
3887 qm_port = &ev_port->qm_port;
3888 use_scalar = qm_port->use_scalar;
3890 if (!dlb2->global_dequeue_wait)
3891 timeout = dequeue_timeout_ticks;
3893 timeout = dlb2->global_dequeue_wait_ticks;
3895 start_ticks = rte_get_timer_cycles();
3897 use_scalar = use_scalar || (max_num & 0x3);
3899 while (num < max_num) {
3900 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3905 uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3907 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3908 num_avail = RTE_MIN(num_avail, max_num - num);
3909 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3910 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3911 n_iter = dlb2_process_dequeue_four_qes(ev_port,
3916 n_iter = dlb2_process_dequeue_qes(ev_port,
3923 /* update rolling_mask for vector code support */
3924 m_rshift = qm_port->cq_rolling_mask >> n_iter;
3925 m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3926 m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3927 m2_lshift = qm_port->cq_rolling_mask_2 <<
3929 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3930 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3932 } else { /* !use_scalar */
3933 num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3936 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3938 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3941 if ((timeout == 0) || (num > 0))
3942 /* Not waiting in any form or 1+ events recd */
3944 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3945 timeout, start_ticks))
3950 qm_port->owed_tokens += num;
3953 if (qm_port->token_pop_mode == AUTO_POP)
3954 dlb2_consume_qe_immediate(qm_port, num);
3956 ev_port->outstanding_releases += num;
3958 dlb2_port_credits_inc(qm_port, num);
3964 static __rte_always_inline int
3965 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3968 uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3969 {0x00, 0x01, 0x03, 0x07} };
3970 uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3971 volatile struct dlb2_dequeue_qe *cq_addr;
3972 __m128i *qes = (__m128i *)qe;
3973 uint64_t *cache_line_base;
3976 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3977 cq_addr = &cq_addr[qm_port->cq_idx];
3979 cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3980 *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3982 /* Load the next CQ cache line from memory. Pack these reads as tight
3983 * as possible to reduce the chance that DLB invalidates the line while
3984 * the CPU is reading it. Read the cache line backwards to ensure that
3985 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3987 * (Valid QEs start at &qe[offset])
3989 qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3990 qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3991 qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3992 qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3994 /* Evict the cache line ASAP */
3995 rte_cldemote(cache_line_base);
3997 /* Extract and combine the gen bits */
3998 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3999 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
4000 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
4001 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4003 /* XOR the combined bits such that a 1 represents a valid QE */
4004 gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4006 /* Mask off gen bits we don't care about */
4007 gen_bits &= and_mask[*offset];
4009 return __builtin_popcount(gen_bits);
4012 static inline int16_t
4013 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4014 struct dlb2_eventdev_port *ev_port,
4015 struct rte_event *events,
4017 uint64_t dequeue_timeout_ticks)
4020 uint64_t start_ticks = 0ULL;
4021 struct dlb2_port *qm_port;
4024 qm_port = &ev_port->qm_port;
4026 /* We have a special implementation for waiting. Wait can be:
4027 * 1) no waiting at all
4029 * 3) wait for interrupt. If wakeup and poll time
4030 * has expired, then return to caller
4031 * 4) umonitor/umwait repeatedly up to poll time
4034 /* If configured for per dequeue wait, then use wait value provided
4035 * to this API. Otherwise we must use the global
4036 * value from eventdev config time.
4038 if (!dlb2->global_dequeue_wait)
4039 timeout = dequeue_timeout_ticks;
4041 timeout = dlb2->global_dequeue_wait_ticks;
4043 start_ticks = rte_get_timer_cycles();
4045 while (num < max_num) {
4046 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4050 /* Copy up to 4 QEs from the current cache line into qes */
4051 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4053 /* But don't process more than the user requested */
4054 num_avail = RTE_MIN(num_avail, max_num - num);
4056 dlb2_inc_cq_idx(qm_port, num_avail);
4058 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4059 num += dlb2_process_dequeue_four_qes(ev_port,
4064 num += dlb2_process_dequeue_qes(ev_port,
4069 else if ((timeout == 0) || (num > 0))
4070 /* Not waiting in any form, or 1+ events received? */
4072 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4073 timeout, start_ticks))
4077 qm_port->owed_tokens += num;
4080 if (qm_port->token_pop_mode == AUTO_POP)
4081 dlb2_consume_qe_immediate(qm_port, num);
4083 ev_port->outstanding_releases += num;
4085 dlb2_port_credits_inc(qm_port, num);
4092 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4095 struct dlb2_eventdev_port *ev_port = event_port;
4096 struct dlb2_port *qm_port = &ev_port->qm_port;
4097 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4100 RTE_ASSERT(ev_port->setup_done);
4101 RTE_ASSERT(ev != NULL);
4103 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4104 uint16_t out_rels = ev_port->outstanding_releases;
4106 dlb2_event_release(dlb2, ev_port->id, out_rels);
4108 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4111 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4112 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4114 cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4116 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4117 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4123 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4125 return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4129 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4130 uint16_t num, uint64_t wait)
4132 struct dlb2_eventdev_port *ev_port = event_port;
4133 struct dlb2_port *qm_port = &ev_port->qm_port;
4134 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4137 RTE_ASSERT(ev_port->setup_done);
4138 RTE_ASSERT(ev != NULL);
4140 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4141 uint16_t out_rels = ev_port->outstanding_releases;
4143 dlb2_event_release(dlb2, ev_port->id, out_rels);
4145 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4148 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4149 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4151 cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4153 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4154 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4159 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4162 return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4166 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4168 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4169 eventdev_stop_flush_t flush;
4170 struct rte_event ev;
4175 flush = dev->dev_ops->dev_stop_flush;
4176 dev_id = dev->data->dev_id;
4177 arg = dev->data->dev_stop_flush_arg;
4179 while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4181 flush(dev_id, ev, arg);
4183 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4186 ev.op = RTE_EVENT_OP_RELEASE;
4188 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4191 /* Enqueue any additional outstanding releases */
4192 ev.op = RTE_EVENT_OP_RELEASE;
4194 for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4195 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4199 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4200 struct dlb2_eventdev_queue *queue)
4202 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4203 struct dlb2_get_ldb_queue_depth_args cfg;
4206 cfg.queue_id = queue->qm_queue.id;
4208 ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4210 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4211 ret, dlb2_error_strings[cfg.response.status]);
4215 return cfg.response.id;
4219 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4220 struct dlb2_eventdev_queue *queue)
4222 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4223 struct dlb2_get_dir_queue_depth_args cfg;
4226 cfg.queue_id = queue->qm_queue.id;
4228 ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4230 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4231 ret, dlb2_error_strings[cfg.response.status]);
4235 return cfg.response.id;
4239 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4240 struct dlb2_eventdev_queue *queue)
4242 if (queue->qm_queue.is_directed)
4243 return dlb2_get_dir_queue_depth(dlb2, queue);
4245 return dlb2_get_ldb_queue_depth(dlb2, queue);
4249 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4250 struct dlb2_eventdev_queue *queue)
4252 return dlb2_get_queue_depth(dlb2, queue) == 0;
4256 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4260 for (i = 0; i < dlb2->num_queues; i++) {
4261 if (dlb2->ev_queues[i].num_links == 0)
4263 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4271 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4275 for (i = 0; i < dlb2->num_queues; i++) {
4276 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4284 dlb2_drain(struct rte_eventdev *dev)
4286 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4287 struct dlb2_eventdev_port *ev_port = NULL;
4291 dev_id = dev->data->dev_id;
4293 while (!dlb2_linked_queues_empty(dlb2)) {
4294 /* Flush all the ev_ports, which will drain all their connected
4297 for (i = 0; i < dlb2->num_ports; i++)
4298 dlb2_flush_port(dev, i);
4301 /* The queues are empty, but there may be events left in the ports. */
4302 for (i = 0; i < dlb2->num_ports; i++)
4303 dlb2_flush_port(dev, i);
4305 /* If the domain's queues are empty, we're done. */
4306 if (dlb2_queues_empty(dlb2))
4309 /* Else, there must be at least one unlinked load-balanced queue.
4310 * Select a load-balanced port with which to drain the unlinked
4313 for (i = 0; i < dlb2->num_ports; i++) {
4314 ev_port = &dlb2->ev_ports[i];
4316 if (!ev_port->qm_port.is_directed)
4320 if (i == dlb2->num_ports) {
4321 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4326 rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4329 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4334 for (i = 0; i < dlb2->num_queues; i++) {
4338 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4344 /* Link the ev_port to the queue */
4345 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4347 DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4352 /* Flush the queue */
4353 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4354 dlb2_flush_port(dev, ev_port->id);
4356 /* Drain any extant events in the ev_port. */
4357 dlb2_flush_port(dev, ev_port->id);
4359 /* Unlink the ev_port from the queue */
4360 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4362 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4370 dlb2_eventdev_stop(struct rte_eventdev *dev)
4372 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4374 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4376 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4377 DLB2_LOG_DBG("Internal error: already stopped\n");
4378 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4380 } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4381 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4382 (int)dlb2->run_state);
4383 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4387 dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4389 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4393 dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4397 dlb2_eventdev_close(struct rte_eventdev *dev)
4399 dlb2_hw_reset_sched_domain(dev, false);
4405 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4410 /* This function intentionally left blank. */
4414 dlb2_eventdev_port_release(void *port)
4416 struct dlb2_eventdev_port *ev_port = port;
4417 struct dlb2_port *qm_port;
4420 qm_port = &ev_port->qm_port;
4421 if (qm_port->config_state == DLB2_CONFIGURED)
4422 dlb2_free_qe_mem(qm_port);
4427 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4428 uint64_t *timeout_ticks)
4431 uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4433 *timeout_ticks = ns * cycles_per_ns;
4439 dlb2_entry_points_init(struct rte_eventdev *dev)
4441 struct dlb2_eventdev *dlb2;
4443 /* Expose PMD's eventdev interface */
4444 static struct eventdev_ops dlb2_eventdev_entry_ops = {
4445 .dev_infos_get = dlb2_eventdev_info_get,
4446 .dev_configure = dlb2_eventdev_configure,
4447 .dev_start = dlb2_eventdev_start,
4448 .dev_stop = dlb2_eventdev_stop,
4449 .dev_close = dlb2_eventdev_close,
4450 .queue_def_conf = dlb2_eventdev_queue_default_conf_get,
4451 .queue_setup = dlb2_eventdev_queue_setup,
4452 .queue_release = dlb2_eventdev_queue_release,
4453 .port_def_conf = dlb2_eventdev_port_default_conf_get,
4454 .port_setup = dlb2_eventdev_port_setup,
4455 .port_release = dlb2_eventdev_port_release,
4456 .port_link = dlb2_eventdev_port_link,
4457 .port_unlink = dlb2_eventdev_port_unlink,
4458 .port_unlinks_in_progress =
4459 dlb2_eventdev_port_unlinks_in_progress,
4460 .timeout_ticks = dlb2_eventdev_timeout_ticks,
4461 .dump = dlb2_eventdev_dump,
4462 .xstats_get = dlb2_eventdev_xstats_get,
4463 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4464 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4465 .xstats_reset = dlb2_eventdev_xstats_reset,
4466 .dev_selftest = test_dlb2_eventdev,
4469 /* Expose PMD's eventdev interface */
4471 dev->dev_ops = &dlb2_eventdev_entry_ops;
4472 dev->enqueue = dlb2_event_enqueue;
4473 dev->enqueue_burst = dlb2_event_enqueue_burst;
4474 dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4475 dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4477 dlb2 = dev->data->dev_private;
4478 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4479 dev->dequeue = dlb2_event_dequeue_sparse;
4480 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4482 dev->dequeue = dlb2_event_dequeue;
4483 dev->dequeue_burst = dlb2_event_dequeue_burst;
4488 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4490 struct dlb2_devargs *dlb2_args)
4492 struct dlb2_eventdev *dlb2;
4495 dlb2 = dev->data->dev_private;
4497 dlb2->event_dev = dev; /* backlink */
4499 evdev_dlb2_default_info.driver_name = name;
4501 dlb2->max_num_events_override = dlb2_args->max_num_events;
4502 dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4503 dlb2->poll_interval = dlb2_args->poll_interval;
4504 dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4505 dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4506 dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4507 dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4510 if (dlb2_args->max_cq_depth != 0)
4511 dlb2->max_cq_depth = dlb2_args->max_cq_depth;
4513 dlb2->max_cq_depth = DLB2_DEFAULT_CQ_DEPTH;
4515 evdev_dlb2_default_info.max_event_port_dequeue_depth = dlb2->max_cq_depth;
4517 err = dlb2_iface_open(&dlb2->qm_instance, name);
4519 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4524 err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4527 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4532 err = dlb2_hw_query_resources(dlb2);
4534 DLB2_LOG_ERR("get resources err=%d for %s\n",
4539 dlb2_iface_hardware_init(&dlb2->qm_instance);
4541 /* configure class of service */
4543 struct dlb2_set_cos_bw_args
4544 set_cos_bw_args = { {0} };
4548 for (id = 0; id < DLB2_COS_NUM_VALS; id++) {
4549 set_cos_bw_args.cos_id = id;
4550 set_cos_bw_args.bandwidth = dlb2->cos_bw[id];
4551 ret = dlb2_iface_set_cos_bw(&dlb2->qm_instance,
4557 DLB2_LOG_ERR("dlb2: failed to configure class of service, err=%d\n",
4563 err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4565 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4570 /* Complete xtstats runtime initialization */
4571 err = dlb2_xstats_init(dlb2);
4573 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4577 /* Initialize each port's token pop mode */
4578 for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4579 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4581 rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4583 dlb2_iface_low_level_io_init();
4585 dlb2_entry_points_init(dev);
4587 dlb2_init_queue_depth_thresholds(dlb2,
4588 dlb2_args->qid_depth_thresholds.val);
4590 dlb2_init_cq_weight(dlb2,
4591 dlb2_args->cq_weight.limit);
4593 dlb2_init_port_cos(dlb2,
4594 dlb2_args->port_cos.cos_id);
4596 dlb2_init_cos_bw(dlb2,
4597 &dlb2_args->cos_bw);
4603 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4606 struct dlb2_eventdev *dlb2;
4609 dlb2 = dev->data->dev_private;
4611 evdev_dlb2_default_info.driver_name = name;
4613 err = dlb2_iface_open(&dlb2->qm_instance, name);
4615 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4620 err = dlb2_hw_query_resources(dlb2);
4622 DLB2_LOG_ERR("get resources err=%d for %s\n",
4627 dlb2_iface_low_level_io_init();
4629 dlb2_entry_points_init(dev);
4635 dlb2_parse_params(const char *params,
4637 struct dlb2_devargs *dlb2_args,
4641 static const char * const args[] = { NUMA_NODE_ARG,
4642 DLB2_MAX_NUM_EVENTS,
4643 DLB2_NUM_DIR_CREDITS,
4645 DLB2_QID_DEPTH_THRESH_ARG,
4647 DLB2_POLL_INTERVAL_ARG,
4648 DLB2_SW_CREDIT_QUANTA_ARG,
4649 DLB2_HW_CREDIT_QUANTA_ARG,
4650 DLB2_DEPTH_THRESH_ARG,
4651 DLB2_VECTOR_OPTS_ENAB_ARG,
4658 if (params != NULL && params[0] != '\0') {
4659 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4661 if (kvlist == NULL) {
4663 "Ignoring unsupported parameters when creating device '%s'\n",
4666 int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4668 &dlb2_args->socket_id);
4670 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4672 rte_kvargs_free(kvlist);
4676 ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4678 &dlb2_args->max_num_events);
4680 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4682 rte_kvargs_free(kvlist);
4686 if (version == DLB2_HW_V2) {
4687 ret = rte_kvargs_process(kvlist,
4688 DLB2_NUM_DIR_CREDITS,
4689 set_num_dir_credits,
4690 &dlb2_args->num_dir_credits_override);
4692 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4694 rte_kvargs_free(kvlist);
4698 ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4700 &dlb2_args->dev_id);
4702 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4704 rte_kvargs_free(kvlist);
4708 if (version == DLB2_HW_V2) {
4709 ret = rte_kvargs_process(
4711 DLB2_QID_DEPTH_THRESH_ARG,
4712 set_qid_depth_thresh,
4713 &dlb2_args->qid_depth_thresholds);
4715 ret = rte_kvargs_process(
4717 DLB2_QID_DEPTH_THRESH_ARG,
4718 set_qid_depth_thresh_v2_5,
4719 &dlb2_args->qid_depth_thresholds);
4722 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4724 rte_kvargs_free(kvlist);
4728 ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4730 &dlb2_args->poll_interval);
4732 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4734 rte_kvargs_free(kvlist);
4738 ret = rte_kvargs_process(kvlist,
4739 DLB2_SW_CREDIT_QUANTA_ARG,
4740 set_sw_credit_quanta,
4741 &dlb2_args->sw_credit_quanta);
4743 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4745 rte_kvargs_free(kvlist);
4749 ret = rte_kvargs_process(kvlist,
4750 DLB2_HW_CREDIT_QUANTA_ARG,
4751 set_hw_credit_quanta,
4752 &dlb2_args->hw_credit_quanta);
4754 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4756 rte_kvargs_free(kvlist);
4760 ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4761 set_default_depth_thresh,
4762 &dlb2_args->default_depth_thresh);
4764 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4766 rte_kvargs_free(kvlist);
4770 ret = rte_kvargs_process(kvlist,
4771 DLB2_VECTOR_OPTS_ENAB_ARG,
4772 set_vector_opts_enab,
4773 &dlb2_args->vector_opts_enabled);
4775 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4777 rte_kvargs_free(kvlist);
4781 ret = rte_kvargs_process(kvlist,
4784 &dlb2_args->max_cq_depth);
4786 DLB2_LOG_ERR("%s: Error parsing max cq depth",
4788 rte_kvargs_free(kvlist);
4792 ret = rte_kvargs_process(kvlist,
4795 &dlb2_args->cq_weight);
4797 DLB2_LOG_ERR("%s: Error parsing cq weight on",
4799 rte_kvargs_free(kvlist);
4803 ret = rte_kvargs_process(kvlist,
4806 &dlb2_args->port_cos);
4808 DLB2_LOG_ERR("%s: Error parsing port cos",
4810 rte_kvargs_free(kvlist);
4814 ret = rte_kvargs_process(kvlist,
4817 &dlb2_args->cos_bw);
4819 DLB2_LOG_ERR("%s: Error parsing cos_bw",
4821 rte_kvargs_free(kvlist);
4826 rte_kvargs_free(kvlist);
4831 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);