1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2020 Intel Corporation
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
25 #include <rte_kvargs.h>
27 #include <rte_malloc.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
32 #include <rte_string_fns.h>
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
39 * Resources exposed to eventdev. Some values overridden at runtime using
40 * values returned by the DLB kernel driver.
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 .driver_name = "", /* probe will set */
47 .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
52 .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
54 .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 .max_event_priority_levels = DLB2_QID_PRIORITIES,
57 .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59 .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 .max_single_link_event_port_queue_pairs =
63 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65 RTE_EVENT_DEV_CAP_EVENT_QOS |
66 RTE_EVENT_DEV_CAP_BURST_MODE |
67 RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68 RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES),
72 struct process_local_port_data
73 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
76 dlb2_free_qe_mem(struct dlb2_port *qm_port)
81 rte_free(qm_port->qe4);
84 rte_free(qm_port->int_arm_qe);
85 qm_port->int_arm_qe = NULL;
87 rte_free(qm_port->consume_qe);
88 qm_port->consume_qe = NULL;
90 rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
91 dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
94 /* override defaults with value(s) provided on command line */
96 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
97 int *qid_depth_thresholds)
101 for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
102 if (qid_depth_thresholds[q] != 0)
103 dlb2->ev_queues[q].depth_threshold =
104 qid_depth_thresholds[q];
109 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
112 struct dlb2_hw_resource_info *dlb2_info = &handle->info;
115 /* Query driver resources provisioned for this device */
117 ret = dlb2_iface_get_num_resources(handle,
118 &dlb2->hw_rsrc_query_results);
120 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
124 /* Complete filling in device resource info returned to evdev app,
125 * overriding any default values.
126 * The capabilities (CAPs) were set at compile time.
129 evdev_dlb2_default_info.max_event_queues =
130 dlb2->hw_rsrc_query_results.num_ldb_queues;
132 evdev_dlb2_default_info.max_event_ports =
133 dlb2->hw_rsrc_query_results.num_ldb_ports;
135 if (dlb2->version == DLB2_HW_V2_5) {
136 evdev_dlb2_default_info.max_num_events =
137 dlb2->hw_rsrc_query_results.num_credits;
139 evdev_dlb2_default_info.max_num_events =
140 dlb2->hw_rsrc_query_results.num_ldb_credits;
142 /* Save off values used when creating the scheduling domain. */
144 handle->info.num_sched_domains =
145 dlb2->hw_rsrc_query_results.num_sched_domains;
147 if (dlb2->version == DLB2_HW_V2_5) {
148 handle->info.hw_rsrc_max.nb_events_limit =
149 dlb2->hw_rsrc_query_results.num_credits;
151 handle->info.hw_rsrc_max.nb_events_limit =
152 dlb2->hw_rsrc_query_results.num_ldb_credits;
154 handle->info.hw_rsrc_max.num_queues =
155 dlb2->hw_rsrc_query_results.num_ldb_queues +
156 dlb2->hw_rsrc_query_results.num_dir_ports;
158 handle->info.hw_rsrc_max.num_ldb_queues =
159 dlb2->hw_rsrc_query_results.num_ldb_queues;
161 handle->info.hw_rsrc_max.num_ldb_ports =
162 dlb2->hw_rsrc_query_results.num_ldb_ports;
164 handle->info.hw_rsrc_max.num_dir_ports =
165 dlb2->hw_rsrc_query_results.num_dir_ports;
167 handle->info.hw_rsrc_max.reorder_window_size =
168 dlb2->hw_rsrc_query_results.num_hist_list_entries;
170 rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
175 #define DLB2_BASE_10 10
178 dlb2_string_to_int(int *result, const char *str)
183 if (str == NULL || result == NULL)
187 ret = strtol(str, &endptr, DLB2_BASE_10);
191 /* long int and int may be different width for some architectures */
192 if (ret < INT_MIN || ret > INT_MAX || endptr == str)
200 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
202 int *socket_id = opaque;
205 ret = dlb2_string_to_int(socket_id, value);
209 if (*socket_id > RTE_MAX_NUMA_NODES)
215 set_max_num_events(const char *key __rte_unused,
219 int *max_num_events = opaque;
222 if (value == NULL || opaque == NULL) {
223 DLB2_LOG_ERR("NULL pointer\n");
227 ret = dlb2_string_to_int(max_num_events, value);
231 if (*max_num_events < 0 || *max_num_events >
232 DLB2_MAX_NUM_LDB_CREDITS) {
233 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
234 DLB2_MAX_NUM_LDB_CREDITS);
242 set_num_dir_credits(const char *key __rte_unused,
246 int *num_dir_credits = opaque;
249 if (value == NULL || opaque == NULL) {
250 DLB2_LOG_ERR("NULL pointer\n");
254 ret = dlb2_string_to_int(num_dir_credits, value);
258 if (*num_dir_credits < 0 ||
259 *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
260 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
261 DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
269 set_dev_id(const char *key __rte_unused,
273 int *dev_id = opaque;
276 if (value == NULL || opaque == NULL) {
277 DLB2_LOG_ERR("NULL pointer\n");
281 ret = dlb2_string_to_int(dev_id, value);
289 set_cos(const char *key __rte_unused,
293 enum dlb2_cos *cos_id = opaque;
297 if (value == NULL || opaque == NULL) {
298 DLB2_LOG_ERR("NULL pointer\n");
302 ret = dlb2_string_to_int(&x, value);
306 if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
308 "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
319 set_poll_interval(const char *key __rte_unused,
323 int *poll_interval = opaque;
326 if (value == NULL || opaque == NULL) {
327 DLB2_LOG_ERR("NULL pointer\n");
331 ret = dlb2_string_to_int(poll_interval, value);
339 set_sw_credit_quanta(const char *key __rte_unused,
343 int *sw_credit_quanta = opaque;
346 if (value == NULL || opaque == NULL) {
347 DLB2_LOG_ERR("NULL pointer\n");
351 ret = dlb2_string_to_int(sw_credit_quanta, value);
359 set_default_depth_thresh(const char *key __rte_unused,
363 int *default_depth_thresh = opaque;
366 if (value == NULL || opaque == NULL) {
367 DLB2_LOG_ERR("NULL pointer\n");
371 ret = dlb2_string_to_int(default_depth_thresh, value);
379 set_vector_opts_enab(const char *key __rte_unused,
383 bool *dlb2_vector_opts_enabled = opaque;
385 if (value == NULL || opaque == NULL) {
386 DLB2_LOG_ERR("NULL pointer\n");
390 if ((*value == 'y') || (*value == 'Y'))
391 *dlb2_vector_opts_enabled = true;
393 *dlb2_vector_opts_enabled = false;
399 set_qid_depth_thresh(const char *key __rte_unused,
403 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
404 int first, last, thresh, i;
406 if (value == NULL || opaque == NULL) {
407 DLB2_LOG_ERR("NULL pointer\n");
411 /* command line override may take one of the following 3 forms:
412 * qid_depth_thresh=all:<threshold_value> ... all queues
413 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
414 * qid_depth_thresh=qid:<threshold_value> ... just one queue
416 if (sscanf(value, "all:%d", &thresh) == 1) {
418 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
419 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
420 /* we have everything we need */
421 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
424 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
428 if (first > last || first < 0 ||
429 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
430 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
434 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
435 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
436 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
440 for (i = first; i <= last; i++)
441 qid_thresh->val[i] = thresh; /* indexed by qid */
447 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
451 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
452 int first, last, thresh, i;
454 if (value == NULL || opaque == NULL) {
455 DLB2_LOG_ERR("NULL pointer\n");
459 /* command line override may take one of the following 3 forms:
460 * qid_depth_thresh=all:<threshold_value> ... all queues
461 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
462 * qid_depth_thresh=qid:<threshold_value> ... just one queue
464 if (sscanf(value, "all:%d", &thresh) == 1) {
466 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
467 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
468 /* we have everything we need */
469 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
472 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
476 if (first > last || first < 0 ||
477 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
478 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
482 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
483 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
484 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
488 for (i = first; i <= last; i++)
489 qid_thresh->val[i] = thresh; /* indexed by qid */
495 dlb2_eventdev_info_get(struct rte_eventdev *dev,
496 struct rte_event_dev_info *dev_info)
498 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
501 ret = dlb2_hw_query_resources(dlb2);
503 const struct rte_eventdev_data *data = dev->data;
505 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
507 /* fn is void, so fall through and return values set up in
512 /* Add num resources currently owned by this domain.
513 * These would become available if the scheduling domain were reset due
514 * to the application recalling eventdev_configure to *reconfigure* the
517 evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
518 evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
519 if (dlb2->version == DLB2_HW_V2_5) {
520 evdev_dlb2_default_info.max_num_events +=
523 evdev_dlb2_default_info.max_num_events +=
524 dlb2->max_ldb_credits;
526 evdev_dlb2_default_info.max_event_queues =
527 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
528 RTE_EVENT_MAX_QUEUES_PER_DEV);
530 evdev_dlb2_default_info.max_num_events =
531 RTE_MIN(evdev_dlb2_default_info.max_num_events,
532 dlb2->max_num_events_override);
534 *dev_info = evdev_dlb2_default_info;
538 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
539 const struct dlb2_hw_rsrcs *resources_asked,
540 uint8_t device_version)
543 struct dlb2_create_sched_domain_args *cfg;
545 if (resources_asked == NULL) {
546 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
551 /* Map generic qm resources to dlb2 resources */
552 cfg = &handle->cfg.resources;
554 /* DIR ports and queues */
556 cfg->num_dir_ports = resources_asked->num_dir_ports;
557 if (device_version == DLB2_HW_V2_5)
558 cfg->num_credits = resources_asked->num_credits;
560 cfg->num_dir_credits = resources_asked->num_dir_credits;
564 cfg->num_ldb_queues = resources_asked->num_ldb_queues;
568 cfg->cos_strict = 0; /* Best effort */
569 cfg->num_cos_ldb_ports[0] = 0;
570 cfg->num_cos_ldb_ports[1] = 0;
571 cfg->num_cos_ldb_ports[2] = 0;
572 cfg->num_cos_ldb_ports[3] = 0;
574 switch (handle->cos_id) {
576 cfg->num_ldb_ports = 0; /* no don't care ports */
577 cfg->num_cos_ldb_ports[0] =
578 resources_asked->num_ldb_ports;
581 cfg->num_ldb_ports = 0; /* no don't care ports */
582 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
585 cfg->num_ldb_ports = 0; /* no don't care ports */
586 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
589 cfg->num_ldb_ports = 0; /* no don't care ports */
590 cfg->num_cos_ldb_ports[3] =
591 resources_asked->num_ldb_ports;
593 case DLB2_COS_DEFAULT:
594 /* all ldb ports are don't care ports from a cos perspective */
596 resources_asked->num_ldb_ports;
600 if (device_version == DLB2_HW_V2)
601 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
603 cfg->num_atomic_inflights =
604 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
607 cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
608 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
610 if (device_version == DLB2_HW_V2_5) {
611 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
613 resources_asked->num_ldb_ports,
615 cfg->num_atomic_inflights,
616 cfg->num_hist_list_entries,
619 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
621 resources_asked->num_ldb_ports,
623 cfg->num_atomic_inflights,
624 cfg->num_hist_list_entries,
625 cfg->num_ldb_credits,
626 cfg->num_dir_credits);
629 /* Configure the QM */
631 ret = dlb2_iface_sched_domain_create(handle, cfg);
633 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
635 dlb2_error_strings[cfg->response.status]);
640 handle->domain_id = cfg->response.id;
641 handle->cfg.configured = true;
649 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
651 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
652 enum dlb2_configuration_state config_state;
655 dlb2_iface_domain_reset(dlb2);
657 /* Free all dynamically allocated port memory */
658 for (i = 0; i < dlb2->num_ports; i++)
659 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
661 /* If reconfiguring, mark the device's queues and ports as "previously
662 * configured." If the user doesn't reconfigure them, the PMD will
663 * reapply their previous configuration when the device is started.
665 config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
668 for (i = 0; i < dlb2->num_ports; i++) {
669 dlb2->ev_ports[i].qm_port.config_state = config_state;
670 /* Reset setup_done so ports can be reconfigured */
671 dlb2->ev_ports[i].setup_done = false;
672 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
673 dlb2->ev_ports[i].link[j].mapped = false;
676 for (i = 0; i < dlb2->num_queues; i++)
677 dlb2->ev_queues[i].qm_queue.config_state = config_state;
679 for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
680 dlb2->ev_queues[i].setup_done = false;
683 dlb2->num_ldb_ports = 0;
684 dlb2->num_dir_ports = 0;
685 dlb2->num_queues = 0;
686 dlb2->num_ldb_queues = 0;
687 dlb2->num_dir_queues = 0;
688 dlb2->configured = false;
691 /* Note: 1 QM instance per QM device, QM instance/device == event device */
693 dlb2_eventdev_configure(const struct rte_eventdev *dev)
695 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
696 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
697 struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
698 const struct rte_eventdev_data *data = dev->data;
699 const struct rte_event_dev_config *config = &data->dev_conf;
702 /* If this eventdev is already configured, we must release the current
703 * scheduling domain before attempting to configure a new one.
705 if (dlb2->configured) {
706 dlb2_hw_reset_sched_domain(dev, true);
707 ret = dlb2_hw_query_resources(dlb2);
709 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
715 if (config->nb_event_queues > rsrcs->num_queues) {
716 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
717 config->nb_event_queues,
721 if (config->nb_event_ports > (rsrcs->num_ldb_ports
722 + rsrcs->num_dir_ports)) {
723 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
724 config->nb_event_ports,
725 (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
728 if (config->nb_events_limit > rsrcs->nb_events_limit) {
729 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
730 config->nb_events_limit,
731 rsrcs->nb_events_limit);
735 if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
736 dlb2->global_dequeue_wait = false;
740 dlb2->global_dequeue_wait = true;
742 /* note size mismatch of timeout vals in eventdev lib. */
743 timeout32 = config->dequeue_timeout_ns;
745 dlb2->global_dequeue_wait_ticks =
746 timeout32 * (rte_get_timer_hz() / 1E9);
749 /* Does this platform support umonitor/umwait? */
750 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
751 dlb2->umwait_allowed = true;
753 rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
754 rsrcs->num_ldb_ports = config->nb_event_ports - rsrcs->num_dir_ports;
755 /* 1 dir queue per dir port */
756 rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
758 if (dlb2->version == DLB2_HW_V2_5) {
759 rsrcs->num_credits = 0;
760 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
761 rsrcs->num_credits = config->nb_events_limit;
763 /* Scale down nb_events_limit by 4 for directed credits,
764 * since there are 4x as many load-balanced credits.
766 rsrcs->num_ldb_credits = 0;
767 rsrcs->num_dir_credits = 0;
769 if (rsrcs->num_ldb_queues)
770 rsrcs->num_ldb_credits = config->nb_events_limit;
771 if (rsrcs->num_dir_ports)
772 rsrcs->num_dir_credits = config->nb_events_limit / 4;
773 if (dlb2->num_dir_credits_override != -1)
774 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
777 if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
778 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
782 dlb2->new_event_limit = config->nb_events_limit;
783 __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
785 /* Save number of ports/queues for this event dev */
786 dlb2->num_ports = config->nb_event_ports;
787 dlb2->num_queues = config->nb_event_queues;
788 dlb2->num_dir_ports = rsrcs->num_dir_ports;
789 dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
790 dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
791 dlb2->num_dir_queues = dlb2->num_dir_ports;
792 if (dlb2->version == DLB2_HW_V2_5) {
793 dlb2->credit_pool = rsrcs->num_credits;
794 dlb2->max_credits = rsrcs->num_credits;
796 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
797 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
798 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
799 dlb2->max_dir_credits = rsrcs->num_dir_credits;
802 dlb2->configured = true;
808 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
810 struct rte_event_port_conf *port_conf)
812 RTE_SET_USED(port_id);
813 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
815 port_conf->new_event_threshold = dlb2->new_event_limit;
816 port_conf->dequeue_depth = 32;
817 port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
818 port_conf->event_port_cfg = 0;
822 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
824 struct rte_event_queue_conf *queue_conf)
827 RTE_SET_USED(queue_id);
829 queue_conf->nb_atomic_flows = 1024;
830 queue_conf->nb_atomic_order_sequences = 64;
831 queue_conf->event_queue_cfg = 0;
832 queue_conf->priority = 0;
836 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
838 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
839 struct dlb2_get_sn_allocation_args cfg;
844 ret = dlb2_iface_get_sn_allocation(handle, &cfg);
846 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
847 ret, dlb2_error_strings[cfg.response.status]);
851 return cfg.response.id;
855 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
857 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
858 struct dlb2_set_sn_allocation_args cfg;
864 ret = dlb2_iface_set_sn_allocation(handle, &cfg);
866 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
867 ret, dlb2_error_strings[cfg.response.status]);
875 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
877 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
878 struct dlb2_get_sn_occupancy_args cfg;
883 ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
885 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
886 ret, dlb2_error_strings[cfg.response.status]);
890 return cfg.response.id;
893 /* Query the current sequence number allocations and, if they conflict with the
894 * requested LDB queue configuration, attempt to re-allocate sequence numbers.
895 * This is best-effort; if it fails, the PMD will attempt to configure the
896 * load-balanced queue and return an error.
899 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
900 const struct rte_event_queue_conf *queue_conf)
902 int grp_occupancy[DLB2_NUM_SN_GROUPS];
903 int grp_alloc[DLB2_NUM_SN_GROUPS];
904 int i, sequence_numbers;
906 sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
908 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
911 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
912 if (grp_alloc[i] < 0)
915 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
917 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
918 if (grp_occupancy[i] < 0)
921 /* DLB has at least one available slot for the requested
922 * sequence numbers, so no further configuration required.
924 if (grp_alloc[i] == sequence_numbers &&
925 grp_occupancy[i] < total_slots)
929 /* None of the sequence number groups are configured for the requested
930 * sequence numbers, so we have to reconfigure one of them. This is
931 * only possible if a group is not in use.
933 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
934 if (grp_occupancy[i] == 0)
938 if (i == DLB2_NUM_SN_GROUPS) {
939 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
940 __func__, sequence_numbers);
944 /* Attempt to configure slot i with the requested number of sequence
945 * numbers. Ignore the return value -- if this fails, the error will be
946 * caught during subsequent queue configuration.
948 dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
952 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
953 struct dlb2_eventdev_queue *ev_queue,
954 const struct rte_event_queue_conf *evq_conf)
956 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
957 struct dlb2_queue *queue = &ev_queue->qm_queue;
958 struct dlb2_create_ldb_queue_args cfg;
963 if (evq_conf == NULL)
966 if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
967 if (evq_conf->nb_atomic_order_sequences != 0)
968 sched_type = RTE_SCHED_TYPE_ORDERED;
970 sched_type = RTE_SCHED_TYPE_PARALLEL;
972 sched_type = evq_conf->schedule_type;
974 cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
975 cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
976 cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
978 if (sched_type != RTE_SCHED_TYPE_ORDERED) {
979 cfg.num_sequence_numbers = 0;
980 cfg.num_qid_inflights = 2048;
983 /* App should set this to the number of hardware flows they want, not
984 * the overall number of flows they're going to use. E.g. if app is
985 * using 64 flows and sets compression to 64, best-case they'll get
986 * 64 unique hashed flows in hardware.
988 switch (evq_conf->nb_atomic_flows) {
989 /* Valid DLB2 compression levels */
994 case (1 * 1024): /* 1K */
995 case (2 * 1024): /* 2K */
996 case (4 * 1024): /* 4K */
997 case (64 * 1024): /* 64K */
998 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1001 /* Invalid compression level */
1002 cfg.lock_id_comp_level = 0; /* no compression */
1005 if (ev_queue->depth_threshold == 0) {
1006 cfg.depth_threshold = dlb2->default_depth_thresh;
1007 ev_queue->depth_threshold =
1008 dlb2->default_depth_thresh;
1010 cfg.depth_threshold = ev_queue->depth_threshold;
1012 ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1014 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1015 ret, dlb2_error_strings[cfg.response.status]);
1019 qm_qid = cfg.response.id;
1021 /* Save off queue config for debug, resource lookups, and reconfig */
1022 queue->num_qid_inflights = cfg.num_qid_inflights;
1023 queue->num_atm_inflights = cfg.num_atomic_inflights;
1025 queue->sched_type = sched_type;
1026 queue->config_state = DLB2_CONFIGURED;
1028 DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1030 cfg.num_atomic_inflights,
1031 cfg.num_sequence_numbers,
1032 cfg.num_qid_inflights);
1038 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1039 struct dlb2_eventdev_queue *ev_queue,
1040 const struct rte_event_queue_conf *queue_conf)
1042 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1045 if (queue_conf->nb_atomic_order_sequences)
1046 dlb2_program_sn_allocation(dlb2, queue_conf);
1048 qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1050 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1055 dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1057 ev_queue->qm_queue.id = qm_qid;
1062 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1066 for (i = 0; i < dlb2->num_queues; i++) {
1067 if (dlb2->ev_queues[i].setup_done &&
1068 dlb2->ev_queues[i].qm_queue.is_directed)
1076 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1077 struct dlb2_eventdev_queue *ev_queue)
1079 struct dlb2_eventdev_port *ev_port;
1082 for (i = 0; i < dlb2->num_ports; i++) {
1083 ev_port = &dlb2->ev_ports[i];
1085 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1086 if (!ev_port->link[j].valid ||
1087 ev_port->link[j].queue_id != ev_queue->id)
1090 ev_port->link[j].valid = false;
1091 ev_port->num_links--;
1095 ev_queue->num_links = 0;
1099 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1101 const struct rte_event_queue_conf *queue_conf)
1103 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1104 struct dlb2_eventdev_queue *ev_queue;
1107 if (queue_conf == NULL)
1110 if (ev_qid >= dlb2->num_queues)
1113 ev_queue = &dlb2->ev_queues[ev_qid];
1115 ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1116 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1117 ev_queue->id = ev_qid;
1118 ev_queue->conf = *queue_conf;
1120 if (!ev_queue->qm_queue.is_directed) {
1121 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1123 /* The directed queue isn't setup until link time, at which
1124 * point we know its directed port ID. Directed queue setup
1125 * will only fail if this queue is already setup or there are
1126 * no directed queues left to configure.
1130 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1132 if (ev_queue->setup_done ||
1133 dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1137 /* Tear down pre-existing port->queue links */
1138 if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1139 dlb2_queue_link_teardown(dlb2, ev_queue);
1142 ev_queue->setup_done = true;
1148 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1150 struct dlb2_cq_pop_qe *qe;
1152 qe = rte_zmalloc(mz_name,
1153 DLB2_NUM_QES_PER_CACHE_LINE *
1154 sizeof(struct dlb2_cq_pop_qe),
1155 RTE_CACHE_LINE_SIZE);
1158 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1161 qm_port->consume_qe = qe;
1167 /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1170 qe->tokens = 0; /* set at run time */
1173 /* Completion IDs are disabled */
1180 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1182 struct dlb2_enqueue_qe *qe;
1184 qe = rte_zmalloc(mz_name,
1185 DLB2_NUM_QES_PER_CACHE_LINE *
1186 sizeof(struct dlb2_enqueue_qe),
1187 RTE_CACHE_LINE_SIZE);
1190 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1193 qm_port->int_arm_qe = qe;
1195 /* V2 - INT ARM is CQ_TOKEN + FRAG */
1202 /* Completion IDs are disabled */
1209 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1213 sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1215 qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1217 if (qm_port->qe4 == NULL) {
1218 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1223 ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1225 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1229 ret = dlb2_init_consume_qe(qm_port, mz_name);
1231 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1239 dlb2_free_qe_mem(qm_port);
1244 static inline uint16_t
1245 dlb2_event_enqueue_delayed(void *event_port,
1246 const struct rte_event events[]);
1248 static inline uint16_t
1249 dlb2_event_enqueue_burst_delayed(void *event_port,
1250 const struct rte_event events[],
1253 static inline uint16_t
1254 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1255 const struct rte_event events[],
1258 static inline uint16_t
1259 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1260 const struct rte_event events[],
1263 /* Generate the required bitmask for rotate-style expected QE gen bits.
1264 * This requires a pattern of 1's and zeros, starting with expected as
1265 * 1 bits, so when hardware writes 0's they're "new". This requires the
1266 * ring size to be powers of 2 to wrap correctly.
1269 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1271 uint64_t cq_build_mask = 0;
1275 return; /* need to fall back to scalar code */
1278 * all 1's in first u64, all zeros in second is correct bit pattern to
1279 * start. Special casing == 64 easier than adapting complex loop logic.
1281 if (cq_depth == 64) {
1282 qm_port->cq_rolling_mask = 0;
1283 qm_port->cq_rolling_mask_2 = -1;
1287 for (i = 0; i < 64; i += (cq_depth * 2))
1288 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1290 qm_port->cq_rolling_mask = cq_build_mask;
1291 qm_port->cq_rolling_mask_2 = cq_build_mask;
1295 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1296 struct dlb2_eventdev_port *ev_port,
1297 uint32_t dequeue_depth,
1298 uint32_t enqueue_depth)
1300 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1301 struct dlb2_create_ldb_port_args cfg = { {0} };
1303 struct dlb2_port *qm_port = NULL;
1304 char mz_name[RTE_MEMZONE_NAMESIZE];
1305 uint32_t qm_port_id;
1306 uint16_t ldb_credit_high_watermark = 0;
1307 uint16_t dir_credit_high_watermark = 0;
1308 uint16_t credit_high_watermark = 0;
1313 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1314 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1319 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1320 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1321 DLB2_MIN_ENQUEUE_DEPTH);
1325 rte_spinlock_lock(&handle->resource_lock);
1327 /* We round up to the next power of 2 if necessary */
1328 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1329 cfg.cq_depth_threshold = 1;
1331 cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1333 if (handle->cos_id == DLB2_COS_DEFAULT)
1336 cfg.cos_id = handle->cos_id;
1340 /* User controls the LDB high watermark via enqueue depth. The DIR high
1341 * watermark is equal, unless the directed credit pool is too small.
1343 if (dlb2->version == DLB2_HW_V2) {
1344 ldb_credit_high_watermark = enqueue_depth;
1345 /* If there are no directed ports, the kernel driver will
1346 * ignore this port's directed credit settings. Don't use
1347 * enqueue_depth if it would require more directed credits
1348 * than are available.
1350 dir_credit_high_watermark =
1351 RTE_MIN(enqueue_depth,
1352 handle->cfg.num_dir_credits / dlb2->num_ports);
1354 credit_high_watermark = enqueue_depth;
1358 ret = dlb2_iface_ldb_port_create(handle, &cfg, dlb2->poll_mode);
1360 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1361 ret, dlb2_error_strings[cfg.response.status]);
1365 qm_port_id = cfg.response.id;
1367 DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1368 ev_port->id, qm_port_id);
1370 qm_port = &ev_port->qm_port;
1371 qm_port->ev_port = ev_port; /* back ptr */
1372 qm_port->dlb2 = dlb2; /* back ptr */
1374 * Allocate and init local qe struct(s).
1375 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1378 snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1381 ret = dlb2_init_qe_mem(qm_port, mz_name);
1383 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1387 qm_port->id = qm_port_id;
1389 if (dlb2->version == DLB2_HW_V2) {
1390 qm_port->cached_ldb_credits = 0;
1391 qm_port->cached_dir_credits = 0;
1393 qm_port->cached_credits = 0;
1395 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1396 * the effective depth is smaller.
1398 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1399 qm_port->cq_idx = 0;
1400 qm_port->cq_idx_unmasked = 0;
1402 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1403 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1405 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1407 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1408 /* starting value of gen bit - it toggles at wrap time */
1409 qm_port->gen_bit = 1;
1411 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1413 qm_port->int_armed = false;
1415 /* Save off for later use in info and lookup APIs. */
1416 qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1418 qm_port->dequeue_depth = dequeue_depth;
1419 qm_port->token_pop_thresh = dequeue_depth;
1421 /* The default enqueue functions do not include delayed-pop support for
1422 * performance reasons.
1424 if (qm_port->token_pop_mode == DELAYED_POP) {
1425 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1426 dlb2->event_dev->enqueue_burst =
1427 dlb2_event_enqueue_burst_delayed;
1428 dlb2->event_dev->enqueue_new_burst =
1429 dlb2_event_enqueue_new_burst_delayed;
1430 dlb2->event_dev->enqueue_forward_burst =
1431 dlb2_event_enqueue_forward_burst_delayed;
1434 qm_port->owed_tokens = 0;
1435 qm_port->issued_releases = 0;
1437 /* Save config message too. */
1438 rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1441 qm_port->state = PORT_STARTED; /* enabled at create time */
1442 qm_port->config_state = DLB2_CONFIGURED;
1444 if (dlb2->version == DLB2_HW_V2) {
1445 qm_port->dir_credits = dir_credit_high_watermark;
1446 qm_port->ldb_credits = ldb_credit_high_watermark;
1447 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1448 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1450 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1453 qm_port->ldb_credits,
1454 qm_port->dir_credits);
1456 qm_port->credits = credit_high_watermark;
1457 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1459 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1465 qm_port->use_scalar = false;
1467 #if (!defined RTE_ARCH_X86_64)
1468 qm_port->use_scalar = true;
1470 if ((qm_port->cq_depth > 64) ||
1471 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1472 (dlb2->vector_opts_enabled == false))
1473 qm_port->use_scalar = true;
1476 rte_spinlock_unlock(&handle->resource_lock);
1483 dlb2_free_qe_mem(qm_port);
1485 rte_spinlock_unlock(&handle->resource_lock);
1487 DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1493 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1494 struct dlb2_eventdev_port *ev_port)
1496 struct dlb2_eventdev_queue *ev_queue;
1499 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1500 if (!ev_port->link[i].valid)
1503 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1505 ev_port->link[i].valid = false;
1506 ev_port->num_links--;
1507 ev_queue->num_links--;
1512 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1513 struct dlb2_eventdev_port *ev_port,
1514 uint32_t dequeue_depth,
1515 uint32_t enqueue_depth)
1517 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1518 struct dlb2_create_dir_port_args cfg = { {0} };
1520 struct dlb2_port *qm_port = NULL;
1521 char mz_name[RTE_MEMZONE_NAMESIZE];
1522 uint32_t qm_port_id;
1523 uint16_t ldb_credit_high_watermark = 0;
1524 uint16_t dir_credit_high_watermark = 0;
1525 uint16_t credit_high_watermark = 0;
1527 if (dlb2 == NULL || handle == NULL)
1530 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1531 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1532 DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1536 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1537 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1538 DLB2_MIN_ENQUEUE_DEPTH);
1542 rte_spinlock_lock(&handle->resource_lock);
1544 /* Directed queues are configured at link time. */
1547 /* We round up to the next power of 2 if necessary */
1548 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1549 cfg.cq_depth_threshold = 1;
1551 /* User controls the LDB high watermark via enqueue depth. The DIR high
1552 * watermark is equal, unless the directed credit pool is too small.
1554 if (dlb2->version == DLB2_HW_V2) {
1555 ldb_credit_high_watermark = enqueue_depth;
1556 /* Don't use enqueue_depth if it would require more directed
1557 * credits than are available.
1559 dir_credit_high_watermark =
1560 RTE_MIN(enqueue_depth,
1561 handle->cfg.num_dir_credits / dlb2->num_ports);
1563 credit_high_watermark = enqueue_depth;
1567 ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
1569 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1570 ret, dlb2_error_strings[cfg.response.status]);
1574 qm_port_id = cfg.response.id;
1576 DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1577 ev_port->id, qm_port_id);
1579 qm_port = &ev_port->qm_port;
1580 qm_port->ev_port = ev_port; /* back ptr */
1581 qm_port->dlb2 = dlb2; /* back ptr */
1584 * Init local qe struct(s).
1585 * Note: MOVDIR64 requires the enqueue QE to be aligned
1588 snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1591 ret = dlb2_init_qe_mem(qm_port, mz_name);
1594 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1598 qm_port->id = qm_port_id;
1600 if (dlb2->version == DLB2_HW_V2) {
1601 qm_port->cached_ldb_credits = 0;
1602 qm_port->cached_dir_credits = 0;
1604 qm_port->cached_credits = 0;
1606 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1607 * the effective depth is smaller.
1609 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1610 qm_port->cq_idx = 0;
1611 qm_port->cq_idx_unmasked = 0;
1613 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1614 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1616 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1618 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1619 /* starting value of gen bit - it toggles at wrap time */
1620 qm_port->gen_bit = 1;
1621 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1623 qm_port->int_armed = false;
1625 /* Save off for later use in info and lookup APIs. */
1626 qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1628 qm_port->dequeue_depth = dequeue_depth;
1630 /* Directed ports are auto-pop, by default. */
1631 qm_port->token_pop_mode = AUTO_POP;
1632 qm_port->owed_tokens = 0;
1633 qm_port->issued_releases = 0;
1635 /* Save config message too. */
1636 rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1639 qm_port->state = PORT_STARTED; /* enabled at create time */
1640 qm_port->config_state = DLB2_CONFIGURED;
1642 if (dlb2->version == DLB2_HW_V2) {
1643 qm_port->dir_credits = dir_credit_high_watermark;
1644 qm_port->ldb_credits = ldb_credit_high_watermark;
1645 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1646 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1648 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1651 dir_credit_high_watermark,
1652 ldb_credit_high_watermark);
1654 qm_port->credits = credit_high_watermark;
1655 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1657 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1660 credit_high_watermark);
1663 #if (!defined RTE_ARCH_X86_64)
1664 qm_port->use_scalar = true;
1666 if ((qm_port->cq_depth > 64) ||
1667 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1668 (dlb2->vector_opts_enabled == false))
1669 qm_port->use_scalar = true;
1672 rte_spinlock_unlock(&handle->resource_lock);
1679 dlb2_free_qe_mem(qm_port);
1681 rte_spinlock_unlock(&handle->resource_lock);
1683 DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1689 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1691 const struct rte_event_port_conf *port_conf)
1693 struct dlb2_eventdev *dlb2;
1694 struct dlb2_eventdev_port *ev_port;
1697 if (dev == NULL || port_conf == NULL) {
1698 DLB2_LOG_ERR("Null parameter\n");
1702 dlb2 = dlb2_pmd_priv(dev);
1704 if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1707 if (port_conf->dequeue_depth >
1708 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1709 port_conf->enqueue_depth >
1710 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1713 ev_port = &dlb2->ev_ports[ev_port_id];
1715 if (ev_port->setup_done) {
1716 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1720 ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1721 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1723 if (!ev_port->qm_port.is_directed) {
1724 ret = dlb2_hw_create_ldb_port(dlb2,
1726 port_conf->dequeue_depth,
1727 port_conf->enqueue_depth);
1729 DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1735 ret = dlb2_hw_create_dir_port(dlb2,
1737 port_conf->dequeue_depth,
1738 port_conf->enqueue_depth);
1740 DLB2_LOG_ERR("Failed to create the DIR port\n");
1745 /* Save off port config for reconfig */
1746 ev_port->conf = *port_conf;
1748 ev_port->id = ev_port_id;
1749 ev_port->enq_configured = true;
1750 ev_port->setup_done = true;
1751 ev_port->inflight_max = port_conf->new_event_threshold;
1752 ev_port->implicit_release = !(port_conf->event_port_cfg &
1753 RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1754 ev_port->outstanding_releases = 0;
1755 ev_port->inflight_credits = 0;
1756 ev_port->credit_update_quanta = dlb2->sw_credit_quanta;
1757 ev_port->dlb2 = dlb2; /* reverse link */
1759 /* Tear down pre-existing port->queue links */
1760 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1761 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1763 dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1769 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1770 uint32_t qm_port_id,
1774 struct dlb2_map_qid_args cfg;
1781 cfg.port_id = qm_port_id;
1783 cfg.priority = EV_TO_DLB2_PRIO(priority);
1785 ret = dlb2_iface_map_qid(handle, &cfg);
1787 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1788 ret, dlb2_error_strings[cfg.response.status]);
1789 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1790 handle->domain_id, cfg.port_id,
1794 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1795 qm_qid, qm_port_id);
1802 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1803 struct dlb2_eventdev_port *ev_port,
1804 struct dlb2_eventdev_queue *ev_queue,
1807 int first_avail = -1;
1810 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1811 if (ev_port->link[i].valid) {
1812 if (ev_port->link[i].queue_id == ev_queue->id &&
1813 ev_port->link[i].priority == priority) {
1814 if (ev_port->link[i].mapped)
1815 return 0; /* already mapped */
1818 } else if (first_avail == -1)
1821 if (first_avail == -1) {
1822 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1823 ev_port->qm_port.id);
1827 ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1828 ev_port->qm_port.id,
1829 ev_queue->qm_queue.id,
1833 ev_port->link[first_avail].mapped = true;
1839 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1840 struct dlb2_eventdev_queue *ev_queue,
1843 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1844 struct dlb2_create_dir_queue_args cfg;
1847 /* The directed port is always configured before its queue */
1848 cfg.port_id = qm_port_id;
1850 if (ev_queue->depth_threshold == 0) {
1851 cfg.depth_threshold = dlb2->default_depth_thresh;
1852 ev_queue->depth_threshold =
1853 dlb2->default_depth_thresh;
1855 cfg.depth_threshold = ev_queue->depth_threshold;
1857 ret = dlb2_iface_dir_queue_create(handle, &cfg);
1859 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1860 ret, dlb2_error_strings[cfg.response.status]);
1864 return cfg.response.id;
1868 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1869 struct dlb2_eventdev_queue *ev_queue,
1870 struct dlb2_eventdev_port *ev_port)
1874 qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1877 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1881 dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1883 ev_queue->qm_queue.id = qm_qid;
1889 dlb2_do_port_link(struct rte_eventdev *dev,
1890 struct dlb2_eventdev_queue *ev_queue,
1891 struct dlb2_eventdev_port *ev_port,
1894 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1897 /* Don't link until start time. */
1898 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1901 if (ev_queue->qm_queue.is_directed)
1902 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1904 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1907 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1908 ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1909 ev_queue->id, ev_port->id);
1919 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1924 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1925 struct dlb2_eventdev_queue *ev_queue;
1926 bool port_is_dir, queue_is_dir;
1928 if (queue_id > dlb2->num_queues) {
1929 rte_errno = -EINVAL;
1933 ev_queue = &dlb2->ev_queues[queue_id];
1935 if (!ev_queue->setup_done &&
1936 ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1937 rte_errno = -EINVAL;
1941 port_is_dir = ev_port->qm_port.is_directed;
1942 queue_is_dir = ev_queue->qm_queue.is_directed;
1944 if (port_is_dir != queue_is_dir) {
1945 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1946 queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1947 port_is_dir ? "DIR" : "LDB", ev_port->id);
1949 rte_errno = -EINVAL;
1953 /* Check if there is space for the requested link */
1954 if (!link_exists && index == -1) {
1955 DLB2_LOG_ERR("no space for new link\n");
1956 rte_errno = -ENOSPC;
1960 /* Check if the directed port is already linked */
1961 if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
1963 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
1965 rte_errno = -EINVAL;
1969 /* Check if the directed queue is already linked */
1970 if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
1972 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
1974 rte_errno = -EINVAL;
1982 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
1983 const uint8_t queues[], const uint8_t priorities[],
1987 struct dlb2_eventdev_port *ev_port = event_port;
1988 struct dlb2_eventdev *dlb2;
1993 if (ev_port == NULL) {
1994 DLB2_LOG_ERR("dlb2: evport not setup\n");
1995 rte_errno = -EINVAL;
1999 if (!ev_port->setup_done &&
2000 ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2001 DLB2_LOG_ERR("dlb2: evport not setup\n");
2002 rte_errno = -EINVAL;
2006 /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2009 if (nb_links == 0) {
2010 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2011 return 0; /* Ignore and return success */
2014 dlb2 = ev_port->dlb2;
2016 DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2018 ev_port->qm_port.is_directed ? "DIR" : "LDB",
2021 for (i = 0; i < nb_links; i++) {
2022 struct dlb2_eventdev_queue *ev_queue;
2023 uint8_t queue_id, prio;
2027 queue_id = queues[i];
2028 prio = priorities[i];
2030 /* Check if the link already exists. */
2031 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2032 if (ev_port->link[j].valid) {
2033 if (ev_port->link[j].queue_id == queue_id) {
2038 } else if (index == -1) {
2042 /* could not link */
2046 /* Check if already linked at the requested priority */
2047 if (found && ev_port->link[j].priority == prio)
2050 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2051 break; /* return index of offending queue */
2053 ev_queue = &dlb2->ev_queues[queue_id];
2055 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2056 break; /* return index of offending queue */
2058 ev_queue->num_links++;
2060 ev_port->link[index].queue_id = queue_id;
2061 ev_port->link[index].priority = prio;
2062 ev_port->link[index].valid = true;
2063 /* Entry already exists? If so, then must be prio change */
2065 ev_port->num_links++;
2071 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2072 uint32_t qm_port_id,
2075 struct dlb2_unmap_qid_args cfg;
2081 cfg.port_id = qm_port_id;
2084 ret = dlb2_iface_unmap_qid(handle, &cfg);
2086 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2087 ret, dlb2_error_strings[cfg.response.status]);
2093 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2094 struct dlb2_eventdev_port *ev_port,
2095 struct dlb2_eventdev_queue *ev_queue)
2099 /* Don't unlink until start time. */
2100 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2103 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2104 if (ev_port->link[i].valid &&
2105 ev_port->link[i].queue_id == ev_queue->id)
2109 /* This is expected with eventdev API!
2110 * It blindly attemmpts to unmap all queues.
2112 if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2113 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2114 ev_queue->qm_queue.id,
2115 ev_port->qm_port.id);
2119 ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2120 ev_port->qm_port.id,
2121 ev_queue->qm_queue.id);
2123 ev_port->link[i].mapped = false;
2129 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2130 uint8_t queues[], uint16_t nb_unlinks)
2132 struct dlb2_eventdev_port *ev_port = event_port;
2133 struct dlb2_eventdev *dlb2;
2138 if (!ev_port->setup_done) {
2139 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2141 rte_errno = -EINVAL;
2145 if (queues == NULL || nb_unlinks == 0) {
2146 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2147 return 0; /* Ignore and return success */
2150 if (ev_port->qm_port.is_directed) {
2151 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2154 return nb_unlinks; /* as if success */
2157 dlb2 = ev_port->dlb2;
2159 for (i = 0; i < nb_unlinks; i++) {
2160 struct dlb2_eventdev_queue *ev_queue;
2163 if (queues[i] >= dlb2->num_queues) {
2164 DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2165 rte_errno = -EINVAL;
2166 return i; /* return index of offending queue */
2169 ev_queue = &dlb2->ev_queues[queues[i]];
2171 /* Does a link exist? */
2172 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2173 if (ev_port->link[j].queue_id == queues[i] &&
2174 ev_port->link[j].valid)
2177 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2180 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2182 DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2183 ret, ev_port->id, queues[i]);
2184 rte_errno = -ENOENT;
2185 return i; /* return index of offending queue */
2188 ev_port->link[j].valid = false;
2189 ev_port->num_links--;
2190 ev_queue->num_links--;
2197 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2200 struct dlb2_eventdev_port *ev_port = event_port;
2201 struct dlb2_eventdev *dlb2;
2202 struct dlb2_hw_dev *handle;
2203 struct dlb2_pending_port_unmaps_args cfg;
2208 if (!ev_port->setup_done) {
2209 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2211 rte_errno = -EINVAL;
2215 cfg.port_id = ev_port->qm_port.id;
2216 dlb2 = ev_port->dlb2;
2217 handle = &dlb2->qm_instance;
2218 ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2221 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2222 ret, dlb2_error_strings[cfg.response.status]);
2226 return cfg.response.id;
2230 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2232 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2235 /* If an event queue or port was previously configured, but hasn't been
2236 * reconfigured, reapply its original configuration.
2238 for (i = 0; i < dlb2->num_queues; i++) {
2239 struct dlb2_eventdev_queue *ev_queue;
2241 ev_queue = &dlb2->ev_queues[i];
2243 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2246 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2248 DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2253 for (i = 0; i < dlb2->num_ports; i++) {
2254 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2256 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2259 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2261 DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2271 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2273 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2276 /* Perform requested port->queue links */
2277 for (i = 0; i < dlb2->num_ports; i++) {
2278 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2281 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2282 struct dlb2_eventdev_queue *ev_queue;
2283 uint8_t prio, queue_id;
2285 if (!ev_port->link[j].valid)
2288 prio = ev_port->link[j].priority;
2289 queue_id = ev_port->link[j].queue_id;
2291 if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2294 ev_queue = &dlb2->ev_queues[queue_id];
2296 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2305 dlb2_eventdev_start(struct rte_eventdev *dev)
2307 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2308 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2309 struct dlb2_start_domain_args cfg;
2312 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2313 if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2314 DLB2_LOG_ERR("bad state %d for dev_start\n",
2315 (int)dlb2->run_state);
2316 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2319 dlb2->run_state = DLB2_RUN_STATE_STARTING;
2320 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2322 /* If the device was configured more than once, some event ports and/or
2323 * queues may need to be reconfigured.
2325 ret = dlb2_eventdev_reapply_configuration(dev);
2329 /* The DLB PMD delays port links until the device is started. */
2330 ret = dlb2_eventdev_apply_port_links(dev);
2334 for (i = 0; i < dlb2->num_ports; i++) {
2335 if (!dlb2->ev_ports[i].setup_done) {
2336 DLB2_LOG_ERR("dlb2: port %d not setup", i);
2341 for (i = 0; i < dlb2->num_queues; i++) {
2342 if (dlb2->ev_queues[i].num_links == 0) {
2343 DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2348 ret = dlb2_iface_sched_domain_start(handle, &cfg);
2350 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2351 ret, dlb2_error_strings[cfg.response.status]);
2355 dlb2->run_state = DLB2_RUN_STATE_STARTED;
2356 DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2361 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2363 /* Load-balanced cmd bytes */
2364 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2365 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2366 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2369 /* Directed cmd bytes */
2370 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2371 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2372 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2376 static inline uint32_t
2377 dlb2_port_credits_get(struct dlb2_port *qm_port,
2378 enum dlb2_hw_queue_types type)
2380 uint32_t credits = *qm_port->credit_pool[type];
2381 uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
2383 if (unlikely(credits < batch_size))
2384 batch_size = credits;
2386 if (likely(credits &&
2387 __atomic_compare_exchange_n(
2388 qm_port->credit_pool[type],
2389 &credits, credits - batch_size, false,
2390 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2397 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2398 struct dlb2_eventdev_port *ev_port)
2400 uint16_t quanta = ev_port->credit_update_quanta;
2402 if (ev_port->inflight_credits >= quanta * 2) {
2403 /* Replenish credits, saving one quanta for enqueues */
2404 uint16_t val = ev_port->inflight_credits - quanta;
2406 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2407 ev_port->inflight_credits -= val;
2412 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2413 struct dlb2_eventdev_port *ev_port)
2415 uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2419 if (unlikely(ev_port->inflight_max < sw_inflights)) {
2420 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2421 rte_errno = -ENOSPC;
2425 if (ev_port->inflight_credits < num) {
2426 /* check if event enqueue brings ev_port over max threshold */
2427 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2429 if (sw_inflights + credit_update_quanta >
2430 dlb2->new_event_limit) {
2432 ev_port->stats.traffic.tx_nospc_new_event_limit,
2434 rte_errno = -ENOSPC;
2438 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2440 ev_port->inflight_credits += (credit_update_quanta);
2442 if (ev_port->inflight_credits < num) {
2444 ev_port->stats.traffic.tx_nospc_inflight_credits,
2446 rte_errno = -ENOSPC;
2455 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2457 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2458 qm_port->cached_ldb_credits =
2459 dlb2_port_credits_get(qm_port,
2461 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2463 qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2465 DLB2_LOG_DBG("ldb credits exhausted\n");
2466 return 1; /* credits exhausted */
2474 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2476 if (unlikely(qm_port->cached_dir_credits == 0)) {
2477 qm_port->cached_dir_credits =
2478 dlb2_port_credits_get(qm_port,
2480 if (unlikely(qm_port->cached_dir_credits == 0)) {
2482 qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2484 DLB2_LOG_DBG("dir credits exhausted\n");
2485 return 1; /* credits exhausted */
2493 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2495 if (unlikely(qm_port->cached_credits == 0)) {
2496 qm_port->cached_credits =
2497 dlb2_port_credits_get(qm_port,
2498 DLB2_COMBINED_POOL);
2499 if (unlikely(qm_port->cached_credits == 0)) {
2501 qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2502 DLB2_LOG_DBG("credits exhausted\n");
2503 return 1; /* credits exhausted */
2510 static __rte_always_inline void
2511 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2512 struct process_local_port_data *port_data)
2514 dlb2_movdir64b(port_data->pp_addr, qe4);
2518 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2520 struct process_local_port_data *port_data;
2521 struct dlb2_cq_pop_qe *qe;
2523 RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2525 qe = qm_port->consume_qe;
2527 qe->tokens = num - 1;
2529 /* No store fence needed since no pointer is being sent, and CQ token
2530 * pops can be safely reordered with other HCWs.
2532 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2534 dlb2_movntdq_single(port_data->pp_addr, qe);
2536 DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2538 qm_port->owed_tokens = 0;
2544 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2546 struct process_local_port_data *port_data)
2548 /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2549 * application writes complete before enqueueing the QE.
2554 dlb2_pp_write(qm_port->qe4, port_data);
2558 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2560 struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2561 int num = qm_port->owed_tokens;
2563 qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2564 qe[idx].tokens = num - 1;
2566 qm_port->owed_tokens = 0;
2570 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2571 const struct rte_event ev[],
2573 uint8_t *sched_type,
2576 struct dlb2_enqueue_qe *qe;
2577 uint16_t sched_word[4];
2583 sse_qe[0] = _mm_setzero_si128();
2584 sse_qe[1] = _mm_setzero_si128();
2588 /* Construct the metadata portion of two HCWs in one 128b SSE
2589 * register. HCW metadata is constructed in the SSE registers
2591 * sse_qe[0][63:0]: qe[0]'s metadata
2592 * sse_qe[0][127:64]: qe[1]'s metadata
2593 * sse_qe[1][63:0]: qe[2]'s metadata
2594 * sse_qe[1][127:64]: qe[3]'s metadata
2597 /* Convert the event operation into a command byte and store it
2599 * sse_qe[0][63:56] = cmd_byte_map[is_directed][ev[0].op]
2600 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2601 * sse_qe[1][63:56] = cmd_byte_map[is_directed][ev[2].op]
2602 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2604 #define DLB2_QE_CMD_BYTE 7
2605 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2606 cmd_byte_map[qm_port->is_directed][ev[0].op],
2608 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2609 cmd_byte_map[qm_port->is_directed][ev[1].op],
2610 DLB2_QE_CMD_BYTE + 8);
2611 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2612 cmd_byte_map[qm_port->is_directed][ev[2].op],
2614 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2615 cmd_byte_map[qm_port->is_directed][ev[3].op],
2616 DLB2_QE_CMD_BYTE + 8);
2618 /* Store priority, scheduling type, and queue ID in the sched
2619 * word array because these values are re-used when the
2620 * destination is a directed queue.
2622 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2623 sched_type[0] << 8 |
2625 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2626 sched_type[1] << 8 |
2628 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2629 sched_type[2] << 8 |
2631 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2632 sched_type[3] << 8 |
2635 /* Store the event priority, scheduling type, and queue ID in
2637 * sse_qe[0][31:16] = sched_word[0]
2638 * sse_qe[0][95:80] = sched_word[1]
2639 * sse_qe[1][31:16] = sched_word[2]
2640 * sse_qe[1][95:80] = sched_word[3]
2642 #define DLB2_QE_QID_SCHED_WORD 1
2643 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2645 DLB2_QE_QID_SCHED_WORD);
2646 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2648 DLB2_QE_QID_SCHED_WORD + 4);
2649 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2651 DLB2_QE_QID_SCHED_WORD);
2652 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2654 DLB2_QE_QID_SCHED_WORD + 4);
2656 /* If the destination is a load-balanced queue, store the lock
2657 * ID. If it is a directed queue, DLB places this field in
2658 * bytes 10-11 of the received QE, so we format it accordingly:
2659 * sse_qe[0][47:32] = dir queue ? sched_word[0] : flow_id[0]
2660 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2661 * sse_qe[1][47:32] = dir queue ? sched_word[2] : flow_id[2]
2662 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2664 #define DLB2_QE_LOCK_ID_WORD 2
2665 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2666 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2667 sched_word[0] : ev[0].flow_id,
2668 DLB2_QE_LOCK_ID_WORD);
2669 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2670 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2671 sched_word[1] : ev[1].flow_id,
2672 DLB2_QE_LOCK_ID_WORD + 4);
2673 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2674 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2675 sched_word[2] : ev[2].flow_id,
2676 DLB2_QE_LOCK_ID_WORD);
2677 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2678 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2679 sched_word[3] : ev[3].flow_id,
2680 DLB2_QE_LOCK_ID_WORD + 4);
2682 /* Store the event type and sub event type in the metadata:
2683 * sse_qe[0][15:0] = flow_id[0]
2684 * sse_qe[0][79:64] = flow_id[1]
2685 * sse_qe[1][15:0] = flow_id[2]
2686 * sse_qe[1][79:64] = flow_id[3]
2688 #define DLB2_QE_EV_TYPE_WORD 0
2689 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2690 ev[0].sub_event_type << 8 |
2692 DLB2_QE_EV_TYPE_WORD);
2693 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2694 ev[1].sub_event_type << 8 |
2696 DLB2_QE_EV_TYPE_WORD + 4);
2697 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2698 ev[2].sub_event_type << 8 |
2700 DLB2_QE_EV_TYPE_WORD);
2701 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2702 ev[3].sub_event_type << 8 |
2704 DLB2_QE_EV_TYPE_WORD + 4);
2706 /* Store the metadata to memory (use the double-precision
2707 * _mm_storeh_pd because there is no integer function for
2708 * storing the upper 64b):
2709 * qe[0] metadata = sse_qe[0][63:0]
2710 * qe[1] metadata = sse_qe[0][127:64]
2711 * qe[2] metadata = sse_qe[1][63:0]
2712 * qe[3] metadata = sse_qe[1][127:64]
2714 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2715 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2716 (__m128d)sse_qe[0]);
2717 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2718 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2719 (__m128d)sse_qe[1]);
2721 qe[0].data = ev[0].u64;
2722 qe[1].data = ev[1].u64;
2723 qe[2].data = ev[2].u64;
2724 qe[3].data = ev[3].u64;
2730 for (i = 0; i < num; i++) {
2732 cmd_byte_map[qm_port->is_directed][ev[i].op];
2733 qe[i].sched_type = sched_type[i];
2734 qe[i].data = ev[i].u64;
2735 qe[i].qid = queue_id[i];
2736 qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2737 qe[i].lock_id = ev[i].flow_id;
2738 if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2739 struct dlb2_msg_info *info =
2740 (struct dlb2_msg_info *)&qe[i].lock_id;
2742 info->qid = queue_id[i];
2743 info->sched_type = DLB2_SCHED_DIRECTED;
2744 info->priority = qe[i].priority;
2746 qe[i].u.event_type.major = ev[i].event_type;
2747 qe[i].u.event_type.sub = ev[i].sub_event_type;
2756 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2757 struct dlb2_port *qm_port,
2758 const struct rte_event ev[],
2759 uint8_t *sched_type,
2762 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2763 struct dlb2_eventdev_queue *ev_queue;
2764 uint16_t *cached_credits = NULL;
2765 struct dlb2_queue *qm_queue;
2767 ev_queue = &dlb2->ev_queues[ev->queue_id];
2768 qm_queue = &ev_queue->qm_queue;
2769 *queue_id = qm_queue->id;
2771 /* Ignore sched_type and hardware credits on release events */
2772 if (ev->op == RTE_EVENT_OP_RELEASE)
2775 if (!qm_queue->is_directed) {
2776 /* Load balanced destination queue */
2778 if (dlb2->version == DLB2_HW_V2) {
2779 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2780 rte_errno = -ENOSPC;
2783 cached_credits = &qm_port->cached_ldb_credits;
2785 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2786 rte_errno = -ENOSPC;
2789 cached_credits = &qm_port->cached_credits;
2791 switch (ev->sched_type) {
2792 case RTE_SCHED_TYPE_ORDERED:
2793 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2794 if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2795 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2797 rte_errno = -EINVAL;
2800 *sched_type = DLB2_SCHED_ORDERED;
2802 case RTE_SCHED_TYPE_ATOMIC:
2803 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2804 *sched_type = DLB2_SCHED_ATOMIC;
2806 case RTE_SCHED_TYPE_PARALLEL:
2807 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2808 if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2809 *sched_type = DLB2_SCHED_ORDERED;
2811 *sched_type = DLB2_SCHED_UNORDERED;
2814 DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2815 DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2816 rte_errno = -EINVAL;
2820 /* Directed destination queue */
2822 if (dlb2->version == DLB2_HW_V2) {
2823 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2824 rte_errno = -ENOSPC;
2827 cached_credits = &qm_port->cached_dir_credits;
2829 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2830 rte_errno = -ENOSPC;
2833 cached_credits = &qm_port->cached_credits;
2835 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2837 *sched_type = DLB2_SCHED_DIRECTED;
2842 case RTE_EVENT_OP_NEW:
2843 /* Check that a sw credit is available */
2844 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2845 rte_errno = -ENOSPC;
2848 ev_port->inflight_credits--;
2849 (*cached_credits)--;
2851 case RTE_EVENT_OP_FORWARD:
2852 /* Check for outstanding_releases underflow. If this occurs,
2853 * the application is not using the EVENT_OPs correctly; for
2854 * example, forwarding or releasing events that were not
2857 RTE_ASSERT(ev_port->outstanding_releases > 0);
2858 ev_port->outstanding_releases--;
2859 qm_port->issued_releases++;
2860 (*cached_credits)--;
2862 case RTE_EVENT_OP_RELEASE:
2863 ev_port->inflight_credits++;
2864 /* Check for outstanding_releases underflow. If this occurs,
2865 * the application is not using the EVENT_OPs correctly; for
2866 * example, forwarding or releasing events that were not
2869 RTE_ASSERT(ev_port->outstanding_releases > 0);
2870 ev_port->outstanding_releases--;
2871 qm_port->issued_releases++;
2873 /* Replenish s/w credits if enough are cached */
2874 dlb2_replenish_sw_credits(dlb2, ev_port);
2878 DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2879 DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2881 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2882 if (ev->op != RTE_EVENT_OP_RELEASE) {
2883 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2884 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2891 static inline uint16_t
2892 __dlb2_event_enqueue_burst(void *event_port,
2893 const struct rte_event events[],
2897 struct dlb2_eventdev_port *ev_port = event_port;
2898 struct dlb2_port *qm_port = &ev_port->qm_port;
2899 struct process_local_port_data *port_data;
2902 RTE_ASSERT(ev_port->enq_configured);
2903 RTE_ASSERT(events != NULL);
2907 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2910 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2911 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2915 memset(qm_port->qe4,
2917 DLB2_NUM_QES_PER_CACHE_LINE *
2918 sizeof(struct dlb2_enqueue_qe));
2920 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2921 const struct rte_event *ev = &events[i + j];
2922 int16_t thresh = qm_port->token_pop_thresh;
2925 qm_port->token_pop_mode == DELAYED_POP &&
2926 (ev->op == RTE_EVENT_OP_FORWARD ||
2927 ev->op == RTE_EVENT_OP_RELEASE) &&
2928 qm_port->issued_releases >= thresh - 1) {
2929 /* Insert the token pop QE and break out. This
2930 * may result in a partial HCW, but that is
2931 * simpler than supporting arbitrary QE
2934 dlb2_construct_token_pop_qe(qm_port, j);
2936 /* Reset the releases for the next QE batch */
2937 qm_port->issued_releases -= thresh;
2944 if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2953 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2954 sched_types, queue_ids);
2956 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2958 /* Don't include the token pop QE in the enqueue count */
2961 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
2964 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
2972 dlb2_event_enqueue_burst(void *event_port,
2973 const struct rte_event events[],
2976 return __dlb2_event_enqueue_burst(event_port, events, num, false);
2980 dlb2_event_enqueue_burst_delayed(void *event_port,
2981 const struct rte_event events[],
2984 return __dlb2_event_enqueue_burst(event_port, events, num, true);
2987 static inline uint16_t
2988 dlb2_event_enqueue(void *event_port,
2989 const struct rte_event events[])
2991 return __dlb2_event_enqueue_burst(event_port, events, 1, false);
2994 static inline uint16_t
2995 dlb2_event_enqueue_delayed(void *event_port,
2996 const struct rte_event events[])
2998 return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3002 dlb2_event_enqueue_new_burst(void *event_port,
3003 const struct rte_event events[],
3006 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3010 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3011 const struct rte_event events[],
3014 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3018 dlb2_event_enqueue_forward_burst(void *event_port,
3019 const struct rte_event events[],
3022 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3026 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3027 const struct rte_event events[],
3030 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3034 dlb2_event_release(struct dlb2_eventdev *dlb2,
3038 struct process_local_port_data *port_data;
3039 struct dlb2_eventdev_port *ev_port;
3040 struct dlb2_port *qm_port;
3043 if (port_id > dlb2->num_ports) {
3044 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3046 rte_errno = -EINVAL;
3050 ev_port = &dlb2->ev_ports[port_id];
3051 qm_port = &ev_port->qm_port;
3052 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3056 if (qm_port->is_directed) {
3058 goto sw_credit_update;
3066 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3067 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3068 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3069 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3072 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3073 int16_t thresh = qm_port->token_pop_thresh;
3075 if (qm_port->token_pop_mode == DELAYED_POP &&
3076 qm_port->issued_releases >= thresh - 1) {
3077 /* Insert the token pop QE */
3078 dlb2_construct_token_pop_qe(qm_port, j);
3080 /* Reset the releases for the next QE batch */
3081 qm_port->issued_releases -= thresh;
3088 qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3089 qm_port->issued_releases++;
3092 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3094 /* Don't include the token pop QE in the release count */
3099 /* each release returns one credit */
3100 if (unlikely(!ev_port->outstanding_releases)) {
3101 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3105 ev_port->outstanding_releases -= i;
3106 ev_port->inflight_credits += i;
3108 /* Replenish s/w credits if enough releases are performed */
3109 dlb2_replenish_sw_credits(dlb2, ev_port);
3113 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3115 uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
3117 /* increment port credits, and return to pool if exceeds threshold */
3118 if (!qm_port->is_directed) {
3119 if (qm_port->dlb2->version == DLB2_HW_V2) {
3120 qm_port->cached_ldb_credits += num;
3121 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3123 qm_port->credit_pool[DLB2_LDB_QUEUE],
3124 batch_size, __ATOMIC_SEQ_CST);
3125 qm_port->cached_ldb_credits -= batch_size;
3128 qm_port->cached_credits += num;
3129 if (qm_port->cached_credits >= 2 * batch_size) {
3131 qm_port->credit_pool[DLB2_COMBINED_POOL],
3132 batch_size, __ATOMIC_SEQ_CST);
3133 qm_port->cached_credits -= batch_size;
3137 if (qm_port->dlb2->version == DLB2_HW_V2) {
3138 qm_port->cached_dir_credits += num;
3139 if (qm_port->cached_dir_credits >= 2 * batch_size) {
3141 qm_port->credit_pool[DLB2_DIR_QUEUE],
3142 batch_size, __ATOMIC_SEQ_CST);
3143 qm_port->cached_dir_credits -= batch_size;
3146 qm_port->cached_credits += num;
3147 if (qm_port->cached_credits >= 2 * batch_size) {
3149 qm_port->credit_pool[DLB2_COMBINED_POOL],
3150 batch_size, __ATOMIC_SEQ_CST);
3151 qm_port->cached_credits -= batch_size;
3157 #define CLB_MASK_IDX 0
3158 #define CLB_VAL_IDX 1
3160 dlb2_monitor_callback(const uint64_t val,
3161 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3163 /* abort if the value matches */
3164 return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3168 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3169 struct dlb2_eventdev_port *ev_port,
3170 struct dlb2_port *qm_port,
3172 uint64_t start_ticks)
3174 struct process_local_port_data *port_data;
3175 uint64_t elapsed_ticks;
3177 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3179 elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3181 /* Wait/poll time expired */
3182 if (elapsed_ticks >= timeout) {
3184 } else if (dlb2->umwait_allowed) {
3185 struct rte_power_monitor_cond pmc;
3186 volatile struct dlb2_dequeue_qe *cq_base;
3189 struct dlb2_dequeue_qe qe;
3191 uint64_t expected_value;
3192 volatile uint64_t *monitor_addr;
3194 qe_mask.qe.cq_gen = 1; /* set mask */
3196 cq_base = port_data->cq_base;
3197 monitor_addr = (volatile uint64_t *)(volatile void *)
3198 &cq_base[qm_port->cq_idx];
3199 monitor_addr++; /* cq_gen bit is in second 64bit location */
3201 if (qm_port->gen_bit)
3202 expected_value = qe_mask.raw_qe[1];
3206 pmc.addr = monitor_addr;
3207 /* store expected value and comparison mask in opaque data */
3208 pmc.opaque[CLB_VAL_IDX] = expected_value;
3209 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3210 /* set up callback */
3211 pmc.fn = dlb2_monitor_callback;
3212 pmc.size = sizeof(uint64_t);
3214 rte_power_monitor(&pmc, timeout + start_ticks);
3216 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3218 uint64_t poll_interval = dlb2->poll_interval;
3219 uint64_t curr_ticks = rte_get_timer_cycles();
3220 uint64_t init_ticks = curr_ticks;
3222 while ((curr_ticks - start_ticks < timeout) &&
3223 (curr_ticks - init_ticks < poll_interval))
3224 curr_ticks = rte_get_timer_cycles();
3230 static __rte_noinline int
3231 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3232 struct dlb2_port *qm_port,
3233 struct rte_event *events,
3234 struct dlb2_dequeue_qe *qes,
3237 uint8_t *qid_mappings = qm_port->qid_mappings;
3240 for (i = 0, num = 0; i < cnt; i++) {
3241 struct dlb2_dequeue_qe *qe = &qes[i];
3242 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3243 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3244 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3245 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3246 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3249 /* Fill in event information.
3250 * Note that flow_id must be embedded in the data by
3251 * the app, such as the mbuf RSS hash field if the data
3254 if (unlikely(qe->error)) {
3255 DLB2_LOG_ERR("QE error bit ON\n");
3256 DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3257 dlb2_consume_qe_immediate(qm_port, 1);
3258 continue; /* Ignore */
3261 events[num].u64 = qe->data;
3262 events[num].flow_id = qe->flow_id;
3263 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3264 events[num].event_type = qe->u.event_type.major;
3265 events[num].sub_event_type = qe->u.event_type.sub;
3266 events[num].sched_type = sched_type_map[qe->sched_type];
3267 events[num].impl_opaque = qe->qid_depth;
3269 /* qid not preserved for directed queues */
3270 if (qm_port->is_directed)
3271 evq_id = ev_port->link[0].queue_id;
3273 evq_id = qid_mappings[qe->qid];
3275 events[num].queue_id = evq_id;
3277 ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3279 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3283 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3289 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3290 struct dlb2_port *qm_port,
3291 struct rte_event *events,
3292 struct dlb2_dequeue_qe *qes)
3294 int sched_type_map[] = {
3295 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3296 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3297 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3298 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3300 const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3301 uint8_t *qid_mappings = qm_port->qid_mappings;
3304 /* In the unlikely case that any of the QE error bits are set, process
3305 * them one at a time.
3307 if (unlikely(qes[0].error || qes[1].error ||
3308 qes[2].error || qes[3].error))
3309 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3312 events[0].u64 = qes[0].data;
3313 events[1].u64 = qes[1].data;
3314 events[2].u64 = qes[2].data;
3315 events[3].u64 = qes[3].data;
3317 /* Construct the metadata portion of two struct rte_events
3318 * in one 128b SSE register. Event metadata is constructed in the SSE
3319 * registers like so:
3320 * sse_evt[0][63:0]: event[0]'s metadata
3321 * sse_evt[0][127:64]: event[1]'s metadata
3322 * sse_evt[1][63:0]: event[2]'s metadata
3323 * sse_evt[1][127:64]: event[3]'s metadata
3325 sse_evt[0] = _mm_setzero_si128();
3326 sse_evt[1] = _mm_setzero_si128();
3328 /* Convert the hardware queue ID to an event queue ID and store it in
3330 * sse_evt[0][47:40] = qid_mappings[qes[0].qid]
3331 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3332 * sse_evt[1][47:40] = qid_mappings[qes[2].qid]
3333 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3335 #define DLB_EVENT_QUEUE_ID_BYTE 5
3336 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3337 qid_mappings[qes[0].qid],
3338 DLB_EVENT_QUEUE_ID_BYTE);
3339 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3340 qid_mappings[qes[1].qid],
3341 DLB_EVENT_QUEUE_ID_BYTE + 8);
3342 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3343 qid_mappings[qes[2].qid],
3344 DLB_EVENT_QUEUE_ID_BYTE);
3345 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3346 qid_mappings[qes[3].qid],
3347 DLB_EVENT_QUEUE_ID_BYTE + 8);
3349 /* Convert the hardware priority to an event priority and store it in
3350 * the metadata, while also returning the queue depth status
3351 * value captured by the hardware, storing it in impl_opaque, which can
3352 * be read by the application but not modified
3353 * sse_evt[0][55:48] = DLB2_TO_EV_PRIO(qes[0].priority)
3354 * sse_evt[0][63:56] = qes[0].qid_depth
3355 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3356 * sse_evt[0][127:120] = qes[1].qid_depth
3357 * sse_evt[1][55:48] = DLB2_TO_EV_PRIO(qes[2].priority)
3358 * sse_evt[1][63:56] = qes[2].qid_depth
3359 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3360 * sse_evt[1][127:120] = qes[3].qid_depth
3362 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3363 #define DLB_BYTE_SHIFT 8
3365 _mm_insert_epi16(sse_evt[0],
3366 DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3367 (qes[0].qid_depth << DLB_BYTE_SHIFT),
3368 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3370 _mm_insert_epi16(sse_evt[0],
3371 DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3372 (qes[1].qid_depth << DLB_BYTE_SHIFT),
3373 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3375 _mm_insert_epi16(sse_evt[1],
3376 DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3377 (qes[2].qid_depth << DLB_BYTE_SHIFT),
3378 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3380 _mm_insert_epi16(sse_evt[1],
3381 DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3382 (qes[3].qid_depth << DLB_BYTE_SHIFT),
3383 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3385 /* Write the event type, sub event type, and flow_id to the event
3387 * sse_evt[0][31:0] = qes[0].flow_id |
3388 * qes[0].u.event_type.major << 28 |
3389 * qes[0].u.event_type.sub << 20;
3390 * sse_evt[0][95:64] = qes[1].flow_id |
3391 * qes[1].u.event_type.major << 28 |
3392 * qes[1].u.event_type.sub << 20;
3393 * sse_evt[1][31:0] = qes[2].flow_id |
3394 * qes[2].u.event_type.major << 28 |
3395 * qes[2].u.event_type.sub << 20;
3396 * sse_evt[1][95:64] = qes[3].flow_id |
3397 * qes[3].u.event_type.major << 28 |
3398 * qes[3].u.event_type.sub << 20;
3400 #define DLB_EVENT_EV_TYPE_DW 0
3401 #define DLB_EVENT_EV_TYPE_SHIFT 28
3402 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3403 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3405 qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3406 qes[0].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3407 DLB_EVENT_EV_TYPE_DW);
3408 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3410 qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3411 qes[1].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3412 DLB_EVENT_EV_TYPE_DW + 2);
3413 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3415 qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3416 qes[2].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3417 DLB_EVENT_EV_TYPE_DW);
3418 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3420 qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3421 qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3422 DLB_EVENT_EV_TYPE_DW + 2);
3424 /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3426 * sse_evt[0][39:32] = sched_type_map[qes[0].sched_type] << 6
3427 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3428 * sse_evt[1][39:32] = sched_type_map[qes[2].sched_type] << 6
3429 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3431 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3432 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3433 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3434 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3435 DLB_EVENT_SCHED_TYPE_BYTE);
3436 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3437 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3438 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3439 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3440 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3441 DLB_EVENT_SCHED_TYPE_BYTE);
3442 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3443 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3444 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3446 /* Store the metadata to the event (use the double-precision
3447 * _mm_storeh_pd because there is no integer function for storing the
3449 * events[0].event = sse_evt[0][63:0]
3450 * events[1].event = sse_evt[0][127:64]
3451 * events[2].event = sse_evt[1][63:0]
3452 * events[3].event = sse_evt[1][127:64]
3454 _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3455 _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3456 _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3457 _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3459 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3460 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3461 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3462 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3465 ev_port->stats.queue[events[0].queue_id].
3466 qid_depth[qes[0].qid_depth],
3469 ev_port->stats.queue[events[1].queue_id].
3470 qid_depth[qes[1].qid_depth],
3473 ev_port->stats.queue[events[2].queue_id].
3474 qid_depth[qes[2].qid_depth],
3477 ev_port->stats.queue[events[3].queue_id].
3478 qid_depth[qes[3].qid_depth],
3481 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3486 static __rte_always_inline int
3487 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3489 volatile struct dlb2_dequeue_qe *cq_addr;
3490 uint8_t xor_mask[2] = {0x0F, 0x00};
3491 const uint8_t and_mask = 0x0F;
3492 __m128i *qes = (__m128i *)qe;
3493 uint8_t gen_bits, gen_bit;
3497 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3499 idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3500 /* Load the next 4 QEs */
3501 addr[0] = (uintptr_t)&cq_addr[idx];
3502 addr[1] = (uintptr_t)&cq_addr[(idx + 4) & qm_port->cq_depth_mask];
3503 addr[2] = (uintptr_t)&cq_addr[(idx + 8) & qm_port->cq_depth_mask];
3504 addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3506 /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3507 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3508 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3509 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3510 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3512 /* Correct the xor_mask for wrap-around QEs */
3513 gen_bit = qm_port->gen_bit;
3514 xor_mask[gen_bit] ^= !!((idx + 4) > qm_port->cq_depth_mask) << 1;
3515 xor_mask[gen_bit] ^= !!((idx + 8) > qm_port->cq_depth_mask) << 2;
3516 xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3518 /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3519 * valid, then QEs[0:N-1] are too.
3521 qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3522 rte_compiler_barrier();
3523 qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3524 rte_compiler_barrier();
3525 qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3526 rte_compiler_barrier();
3527 qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3529 /* Extract and combine the gen bits */
3530 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3531 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3532 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3533 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3535 /* XOR the combined bits such that a 1 represents a valid QE */
3536 gen_bits ^= xor_mask[gen_bit];
3538 /* Mask off gen bits we don't care about */
3539 gen_bits &= and_mask;
3541 return __builtin_popcount(gen_bits);
3545 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3546 struct rte_event *events,
3552 __m128i v_qe_status,
3553 uint32_t valid_events)
3555 /* Look up the event QIDs, using the hardware QIDs to index the
3556 * port's QID mapping.
3558 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3559 * passed along in registers as the QE data is required later.
3561 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3562 * 32-bit slice of each QE, so makes up a full SSE register. This
3563 * allows parallel processing of 4x QEs in a single register.
3566 __m128i v_qid_done = {0};
3567 int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3568 int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3569 int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3570 int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3572 int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3573 int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3574 int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3575 int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3577 int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3578 int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3579 int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3580 int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3582 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3583 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3584 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3585 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3587 /* Schedule field remapping using byte shuffle
3588 * - Full byte containing sched field handled here (op, rsvd are zero)
3589 * - Note sanitizing the register requires two masking ANDs:
3590 * 1) to strip prio/msg_type from byte for correct shuffle lookup
3591 * 2) to strip any non-sched-field lanes from any results to OR later
3592 * - Final byte result is >> 10 to another byte-lane inside the u32.
3593 * This makes the final combination OR easier to make the rte_event.
3595 __m128i v_sched_done;
3596 __m128i v_sched_bits;
3598 static const uint8_t sched_type_map[16] = {
3599 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3600 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3601 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3602 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3604 static const uint8_t sched_and_mask[16] = {
3605 0x00, 0x00, 0x00, 0x03,
3606 0x00, 0x00, 0x00, 0x03,
3607 0x00, 0x00, 0x00, 0x03,
3608 0x00, 0x00, 0x00, 0x03,
3610 const __m128i v_sched_map = _mm_loadu_si128(
3611 (const __m128i *)sched_type_map);
3612 __m128i v_sched_mask = _mm_loadu_si128(
3613 (const __m128i *)&sched_and_mask);
3614 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3615 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3617 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3619 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3622 /* Priority handling
3623 * - QE provides 3 bits of priority
3624 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3625 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3627 __m128i v_prio_done;
3629 static const uint8_t prio_mask[16] = {
3630 0x00, 0x00, 0x00, 0x07 << 5,
3631 0x00, 0x00, 0x00, 0x07 << 5,
3632 0x00, 0x00, 0x00, 0x07 << 5,
3633 0x00, 0x00, 0x00, 0x07 << 5,
3635 __m128i v_prio_mask = _mm_loadu_si128(
3636 (const __m128i *)prio_mask);
3637 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3638 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3641 /* Event Sub/Type handling:
3642 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3643 * to get the sub/ev type data into rte_event location, clearing the
3644 * lower 20 bits in the process.
3646 __m128i v_types_done;
3648 static const uint8_t event_mask[16] = {
3649 0x0f, 0x00, 0x00, 0x00,
3650 0x0f, 0x00, 0x00, 0x00,
3651 0x0f, 0x00, 0x00, 0x00,
3652 0x0f, 0x00, 0x00, 0x00,
3654 static const uint8_t sub_event_mask[16] = {
3655 0xff, 0x00, 0x00, 0x00,
3656 0xff, 0x00, 0x00, 0x00,
3657 0xff, 0x00, 0x00, 0x00,
3658 0xff, 0x00, 0x00, 0x00,
3660 static const uint8_t flow_mask[16] = {
3661 0xff, 0xff, 0x00, 0x00,
3662 0xff, 0xff, 0x00, 0x00,
3663 0xff, 0xff, 0x00, 0x00,
3664 0xff, 0xff, 0x00, 0x00,
3666 __m128i v_event_mask = _mm_loadu_si128(
3667 (const __m128i *)event_mask);
3668 __m128i v_sub_event_mask = _mm_loadu_si128(
3669 (const __m128i *)sub_event_mask);
3670 __m128i v_flow_mask = _mm_loadu_si128(
3671 (const __m128i *)flow_mask);
3672 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3673 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3674 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3675 v_type = _mm_slli_epi32(v_type, 8);
3676 v_types_done = _mm_or_si128(v_type, v_sub);
3677 v_types_done = _mm_slli_epi32(v_types_done, 20);
3678 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3679 v_types_done = _mm_or_si128(v_types_done, v_flow);
3682 /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3683 * with the rte_event, allowing unpacks to move/blend with payload.
3685 __m128i v_q_s_p_done;
3687 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3688 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3689 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3692 __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3694 /* Unpack evs into u64 metadata, then indiv events */
3695 v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3696 v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3698 switch (valid_events) {
3700 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3701 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3702 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3703 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3707 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3708 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3709 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3713 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3714 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3715 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3716 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3720 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3721 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3722 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3727 static __rte_always_inline int
3728 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3729 uint32_t max_events)
3731 /* Using unmasked idx for perf, and masking manually */
3732 uint16_t idx = qm_port->cq_idx_unmasked;
3733 volatile struct dlb2_dequeue_qe *cq_addr;
3735 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3737 uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3738 qm_port->cq_depth_mask];
3739 uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx + 8) &
3740 qm_port->cq_depth_mask];
3741 uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx + 4) &
3742 qm_port->cq_depth_mask];
3743 uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx + 0) &
3744 qm_port->cq_depth_mask];
3746 /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3747 __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3748 rte_compiler_barrier();
3749 __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3750 rte_compiler_barrier();
3751 __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3752 rte_compiler_barrier();
3753 __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3755 /* Generate the pkt_shuffle mask;
3756 * - Avoids load in otherwise load-heavy section of code
3757 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3759 const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3760 __m128i v_zeros = _mm_setzero_si128();
3761 __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3762 __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3764 /* Extract u32 components required from the QE
3765 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3766 * - QE[96 to 127] for status (cq gen bit, error)
3768 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3770 __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3771 __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3772 __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3773 __m128i v_qe_meta = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3775 /* Status byte (gen_bit, error) handling:
3776 * - Shuffle to lanes 0,1,2,3, clear all others
3777 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3778 * - Shift right by 2 for error bit to MSB, movemask to scalar
3780 __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3781 __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3782 int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3784 /* Expected vs Reality of QE Gen bits
3785 * - cq_rolling_mask provides expected bits
3786 * - QE loads, unpacks/shuffle and movemask provides reality
3787 * - XOR of the two gives bitmask of new packets
3788 * - POPCNT to get the number of new events
3790 uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3791 uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3792 uint32_t count_new = __builtin_popcount(qe_xor_bits);
3793 count_new = RTE_MIN(count_new, max_events);
3797 /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3799 uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3800 uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3801 uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3802 uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3804 /* shifted out of m2 into MSB of m */
3805 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3807 /* shifted out of m "looped back" into MSB of m2 */
3808 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3810 /* Prefetch the next QEs - should run as IPC instead of cycles */
3811 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3812 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3813 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3814 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3816 /* Convert QEs from XMM regs to events and store events directly */
3817 _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3818 v_qe_0, v_qe_meta, v_qe_status, count_new);
3824 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3826 uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3828 qm_port->cq_idx_unmasked = idx;
3829 qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3830 qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3833 static inline int16_t
3834 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3835 struct dlb2_eventdev_port *ev_port,
3836 struct rte_event *events,
3838 uint64_t dequeue_timeout_ticks)
3840 uint64_t start_ticks = 0ULL;
3841 struct dlb2_port *qm_port;
3846 qm_port = &ev_port->qm_port;
3847 use_scalar = qm_port->use_scalar;
3849 if (!dlb2->global_dequeue_wait)
3850 timeout = dequeue_timeout_ticks;
3852 timeout = dlb2->global_dequeue_wait_ticks;
3854 start_ticks = rte_get_timer_cycles();
3856 use_scalar = use_scalar || (max_num & 0x3);
3858 while (num < max_num) {
3859 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3862 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3863 num_avail = RTE_MIN(num_avail, max_num - num);
3864 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3865 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3866 num += dlb2_process_dequeue_four_qes(ev_port,
3871 num += dlb2_process_dequeue_qes(ev_port,
3876 } else { /* !use_scalar */
3877 num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3881 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3882 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3887 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3888 timeout, start_ticks))
3893 qm_port->owed_tokens += num;
3896 if (qm_port->token_pop_mode == AUTO_POP)
3897 dlb2_consume_qe_immediate(qm_port, num);
3899 ev_port->outstanding_releases += num;
3901 dlb2_port_credits_inc(qm_port, num);
3907 static __rte_always_inline int
3908 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3911 uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3912 {0x00, 0x01, 0x03, 0x07} };
3913 uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3914 volatile struct dlb2_dequeue_qe *cq_addr;
3915 __m128i *qes = (__m128i *)qe;
3916 uint64_t *cache_line_base;
3919 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3920 cq_addr = &cq_addr[qm_port->cq_idx];
3922 cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3923 *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3925 /* Load the next CQ cache line from memory. Pack these reads as tight
3926 * as possible to reduce the chance that DLB invalidates the line while
3927 * the CPU is reading it. Read the cache line backwards to ensure that
3928 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3930 * (Valid QEs start at &qe[offset])
3932 qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3933 qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3934 qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3935 qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3937 /* Evict the cache line ASAP */
3938 rte_cldemote(cache_line_base);
3940 /* Extract and combine the gen bits */
3941 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3942 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3943 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3944 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3946 /* XOR the combined bits such that a 1 represents a valid QE */
3947 gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
3949 /* Mask off gen bits we don't care about */
3950 gen_bits &= and_mask[*offset];
3952 return __builtin_popcount(gen_bits);
3955 static inline int16_t
3956 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
3957 struct dlb2_eventdev_port *ev_port,
3958 struct rte_event *events,
3960 uint64_t dequeue_timeout_ticks)
3963 uint64_t start_ticks = 0ULL;
3964 struct dlb2_port *qm_port;
3967 qm_port = &ev_port->qm_port;
3969 /* We have a special implementation for waiting. Wait can be:
3970 * 1) no waiting at all
3972 * 3) wait for interrupt. If wakeup and poll time
3973 * has expired, then return to caller
3974 * 4) umonitor/umwait repeatedly up to poll time
3977 /* If configured for per dequeue wait, then use wait value provided
3978 * to this API. Otherwise we must use the global
3979 * value from eventdev config time.
3981 if (!dlb2->global_dequeue_wait)
3982 timeout = dequeue_timeout_ticks;
3984 timeout = dlb2->global_dequeue_wait_ticks;
3986 start_ticks = rte_get_timer_cycles();
3988 while (num < max_num) {
3989 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3993 /* Copy up to 4 QEs from the current cache line into qes */
3994 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
3996 /* But don't process more than the user requested */
3997 num_avail = RTE_MIN(num_avail, max_num - num);
3999 dlb2_inc_cq_idx(qm_port, num_avail);
4001 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4002 num += dlb2_process_dequeue_four_qes(ev_port,
4007 num += dlb2_process_dequeue_qes(ev_port,
4012 else if ((timeout == 0) || (num > 0))
4013 /* Not waiting in any form, or 1+ events received? */
4015 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4016 timeout, start_ticks))
4020 qm_port->owed_tokens += num;
4023 if (qm_port->token_pop_mode == AUTO_POP)
4024 dlb2_consume_qe_immediate(qm_port, num);
4026 ev_port->outstanding_releases += num;
4028 dlb2_port_credits_inc(qm_port, num);
4035 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4038 struct dlb2_eventdev_port *ev_port = event_port;
4039 struct dlb2_port *qm_port = &ev_port->qm_port;
4040 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4043 RTE_ASSERT(ev_port->setup_done);
4044 RTE_ASSERT(ev != NULL);
4046 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4047 uint16_t out_rels = ev_port->outstanding_releases;
4049 dlb2_event_release(dlb2, ev_port->id, out_rels);
4051 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4054 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4055 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4057 cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4059 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4060 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4066 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4068 return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4072 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4073 uint16_t num, uint64_t wait)
4075 struct dlb2_eventdev_port *ev_port = event_port;
4076 struct dlb2_port *qm_port = &ev_port->qm_port;
4077 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4080 RTE_ASSERT(ev_port->setup_done);
4081 RTE_ASSERT(ev != NULL);
4083 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4084 uint16_t out_rels = ev_port->outstanding_releases;
4086 dlb2_event_release(dlb2, ev_port->id, out_rels);
4088 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4091 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4092 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4094 cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4096 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4097 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4102 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4105 return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4109 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4111 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4112 eventdev_stop_flush_t flush;
4113 struct rte_event ev;
4118 flush = dev->dev_ops->dev_stop_flush;
4119 dev_id = dev->data->dev_id;
4120 arg = dev->data->dev_stop_flush_arg;
4122 while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4124 flush(dev_id, ev, arg);
4126 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4129 ev.op = RTE_EVENT_OP_RELEASE;
4131 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4134 /* Enqueue any additional outstanding releases */
4135 ev.op = RTE_EVENT_OP_RELEASE;
4137 for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4138 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4142 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4143 struct dlb2_eventdev_queue *queue)
4145 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4146 struct dlb2_get_ldb_queue_depth_args cfg;
4149 cfg.queue_id = queue->qm_queue.id;
4151 ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4153 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4154 ret, dlb2_error_strings[cfg.response.status]);
4158 return cfg.response.id;
4162 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4163 struct dlb2_eventdev_queue *queue)
4165 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4166 struct dlb2_get_dir_queue_depth_args cfg;
4169 cfg.queue_id = queue->qm_queue.id;
4171 ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4173 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4174 ret, dlb2_error_strings[cfg.response.status]);
4178 return cfg.response.id;
4182 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4183 struct dlb2_eventdev_queue *queue)
4185 if (queue->qm_queue.is_directed)
4186 return dlb2_get_dir_queue_depth(dlb2, queue);
4188 return dlb2_get_ldb_queue_depth(dlb2, queue);
4192 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4193 struct dlb2_eventdev_queue *queue)
4195 return dlb2_get_queue_depth(dlb2, queue) == 0;
4199 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4203 for (i = 0; i < dlb2->num_queues; i++) {
4204 if (dlb2->ev_queues[i].num_links == 0)
4206 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4214 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4218 for (i = 0; i < dlb2->num_queues; i++) {
4219 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4227 dlb2_drain(struct rte_eventdev *dev)
4229 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4230 struct dlb2_eventdev_port *ev_port = NULL;
4234 dev_id = dev->data->dev_id;
4236 while (!dlb2_linked_queues_empty(dlb2)) {
4237 /* Flush all the ev_ports, which will drain all their connected
4240 for (i = 0; i < dlb2->num_ports; i++)
4241 dlb2_flush_port(dev, i);
4244 /* The queues are empty, but there may be events left in the ports. */
4245 for (i = 0; i < dlb2->num_ports; i++)
4246 dlb2_flush_port(dev, i);
4248 /* If the domain's queues are empty, we're done. */
4249 if (dlb2_queues_empty(dlb2))
4252 /* Else, there must be at least one unlinked load-balanced queue.
4253 * Select a load-balanced port with which to drain the unlinked
4256 for (i = 0; i < dlb2->num_ports; i++) {
4257 ev_port = &dlb2->ev_ports[i];
4259 if (!ev_port->qm_port.is_directed)
4263 if (i == dlb2->num_ports) {
4264 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4269 rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4272 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4277 for (i = 0; i < dlb2->num_queues; i++) {
4281 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4287 /* Link the ev_port to the queue */
4288 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4290 DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4295 /* Flush the queue */
4296 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4297 dlb2_flush_port(dev, ev_port->id);
4299 /* Drain any extant events in the ev_port. */
4300 dlb2_flush_port(dev, ev_port->id);
4302 /* Unlink the ev_port from the queue */
4303 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4305 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4313 dlb2_eventdev_stop(struct rte_eventdev *dev)
4315 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4317 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4319 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4320 DLB2_LOG_DBG("Internal error: already stopped\n");
4321 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4323 } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4324 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4325 (int)dlb2->run_state);
4326 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4330 dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4332 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4336 dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4340 dlb2_eventdev_close(struct rte_eventdev *dev)
4342 dlb2_hw_reset_sched_domain(dev, false);
4348 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4353 /* This function intentionally left blank. */
4357 dlb2_eventdev_port_release(void *port)
4359 struct dlb2_eventdev_port *ev_port = port;
4360 struct dlb2_port *qm_port;
4363 qm_port = &ev_port->qm_port;
4364 if (qm_port->config_state == DLB2_CONFIGURED)
4365 dlb2_free_qe_mem(qm_port);
4370 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4371 uint64_t *timeout_ticks)
4374 uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4376 *timeout_ticks = ns * cycles_per_ns;
4382 dlb2_entry_points_init(struct rte_eventdev *dev)
4384 struct dlb2_eventdev *dlb2;
4386 /* Expose PMD's eventdev interface */
4387 static struct eventdev_ops dlb2_eventdev_entry_ops = {
4388 .dev_infos_get = dlb2_eventdev_info_get,
4389 .dev_configure = dlb2_eventdev_configure,
4390 .dev_start = dlb2_eventdev_start,
4391 .dev_stop = dlb2_eventdev_stop,
4392 .dev_close = dlb2_eventdev_close,
4393 .queue_def_conf = dlb2_eventdev_queue_default_conf_get,
4394 .queue_setup = dlb2_eventdev_queue_setup,
4395 .queue_release = dlb2_eventdev_queue_release,
4396 .port_def_conf = dlb2_eventdev_port_default_conf_get,
4397 .port_setup = dlb2_eventdev_port_setup,
4398 .port_release = dlb2_eventdev_port_release,
4399 .port_link = dlb2_eventdev_port_link,
4400 .port_unlink = dlb2_eventdev_port_unlink,
4401 .port_unlinks_in_progress =
4402 dlb2_eventdev_port_unlinks_in_progress,
4403 .timeout_ticks = dlb2_eventdev_timeout_ticks,
4404 .dump = dlb2_eventdev_dump,
4405 .xstats_get = dlb2_eventdev_xstats_get,
4406 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4407 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4408 .xstats_reset = dlb2_eventdev_xstats_reset,
4409 .dev_selftest = test_dlb2_eventdev,
4412 /* Expose PMD's eventdev interface */
4414 dev->dev_ops = &dlb2_eventdev_entry_ops;
4415 dev->enqueue = dlb2_event_enqueue;
4416 dev->enqueue_burst = dlb2_event_enqueue_burst;
4417 dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4418 dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4420 dlb2 = dev->data->dev_private;
4421 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4422 dev->dequeue = dlb2_event_dequeue_sparse;
4423 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4425 dev->dequeue = dlb2_event_dequeue;
4426 dev->dequeue_burst = dlb2_event_dequeue_burst;
4431 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4433 struct dlb2_devargs *dlb2_args)
4435 struct dlb2_eventdev *dlb2;
4438 dlb2 = dev->data->dev_private;
4440 dlb2->event_dev = dev; /* backlink */
4442 evdev_dlb2_default_info.driver_name = name;
4444 dlb2->max_num_events_override = dlb2_args->max_num_events;
4445 dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4446 dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4447 dlb2->poll_interval = dlb2_args->poll_interval;
4448 dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4449 dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4450 dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4452 err = dlb2_iface_open(&dlb2->qm_instance, name);
4454 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4459 err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4462 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4467 err = dlb2_hw_query_resources(dlb2);
4469 DLB2_LOG_ERR("get resources err=%d for %s\n",
4474 dlb2_iface_hardware_init(&dlb2->qm_instance);
4476 err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4478 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4483 /* Complete xtstats runtime initialization */
4484 err = dlb2_xstats_init(dlb2);
4486 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4490 /* Initialize each port's token pop mode */
4491 for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4492 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4494 rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4496 dlb2_iface_low_level_io_init();
4498 dlb2_entry_points_init(dev);
4500 dlb2_init_queue_depth_thresholds(dlb2,
4501 dlb2_args->qid_depth_thresholds.val);
4507 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4510 struct dlb2_eventdev *dlb2;
4513 dlb2 = dev->data->dev_private;
4515 evdev_dlb2_default_info.driver_name = name;
4517 err = dlb2_iface_open(&dlb2->qm_instance, name);
4519 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4524 err = dlb2_hw_query_resources(dlb2);
4526 DLB2_LOG_ERR("get resources err=%d for %s\n",
4531 dlb2_iface_low_level_io_init();
4533 dlb2_entry_points_init(dev);
4539 dlb2_parse_params(const char *params,
4541 struct dlb2_devargs *dlb2_args,
4545 static const char * const args[] = { NUMA_NODE_ARG,
4546 DLB2_MAX_NUM_EVENTS,
4547 DLB2_NUM_DIR_CREDITS,
4549 DLB2_QID_DEPTH_THRESH_ARG,
4551 DLB2_POLL_INTERVAL_ARG,
4552 DLB2_SW_CREDIT_QUANTA_ARG,
4553 DLB2_DEPTH_THRESH_ARG,
4554 DLB2_VECTOR_OPTS_ENAB_ARG,
4557 if (params != NULL && params[0] != '\0') {
4558 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4560 if (kvlist == NULL) {
4562 "Ignoring unsupported parameters when creating device '%s'\n",
4565 int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4567 &dlb2_args->socket_id);
4569 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4571 rte_kvargs_free(kvlist);
4575 ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4577 &dlb2_args->max_num_events);
4579 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4581 rte_kvargs_free(kvlist);
4585 if (version == DLB2_HW_V2) {
4586 ret = rte_kvargs_process(kvlist,
4587 DLB2_NUM_DIR_CREDITS,
4588 set_num_dir_credits,
4589 &dlb2_args->num_dir_credits_override);
4591 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4593 rte_kvargs_free(kvlist);
4597 ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4599 &dlb2_args->dev_id);
4601 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4603 rte_kvargs_free(kvlist);
4607 if (version == DLB2_HW_V2) {
4608 ret = rte_kvargs_process(
4610 DLB2_QID_DEPTH_THRESH_ARG,
4611 set_qid_depth_thresh,
4612 &dlb2_args->qid_depth_thresholds);
4614 ret = rte_kvargs_process(
4616 DLB2_QID_DEPTH_THRESH_ARG,
4617 set_qid_depth_thresh_v2_5,
4618 &dlb2_args->qid_depth_thresholds);
4621 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4623 rte_kvargs_free(kvlist);
4627 ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4629 &dlb2_args->cos_id);
4631 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4633 rte_kvargs_free(kvlist);
4637 ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4639 &dlb2_args->poll_interval);
4641 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4643 rte_kvargs_free(kvlist);
4647 ret = rte_kvargs_process(kvlist,
4648 DLB2_SW_CREDIT_QUANTA_ARG,
4649 set_sw_credit_quanta,
4650 &dlb2_args->sw_credit_quanta);
4652 DLB2_LOG_ERR("%s: Error parsing sw xredit quanta parameter",
4654 rte_kvargs_free(kvlist);
4658 ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4659 set_default_depth_thresh,
4660 &dlb2_args->default_depth_thresh);
4662 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4664 rte_kvargs_free(kvlist);
4668 ret = rte_kvargs_process(kvlist,
4669 DLB2_VECTOR_OPTS_ENAB_ARG,
4670 set_vector_opts_enab,
4671 &dlb2_args->vector_opts_enabled);
4673 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4675 rte_kvargs_free(kvlist);
4679 rte_kvargs_free(kvlist);
4684 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);