1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2020 Intel Corporation
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
25 #include <rte_kvargs.h>
27 #include <rte_malloc.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
32 #include <rte_string_fns.h>
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
39 * Resources exposed to eventdev. Some values overridden at runtime using
40 * values returned by the DLB kernel driver.
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 .driver_name = "", /* probe will set */
47 .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
52 .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
54 .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 .max_event_priority_levels = DLB2_QID_PRIORITIES,
57 .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 .max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59 .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 .max_single_link_event_port_queue_pairs =
63 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65 RTE_EVENT_DEV_CAP_EVENT_QOS |
66 RTE_EVENT_DEV_CAP_BURST_MODE |
67 RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68 RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES),
72 struct process_local_port_data
73 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
76 dlb2_free_qe_mem(struct dlb2_port *qm_port)
81 rte_free(qm_port->qe4);
84 rte_free(qm_port->int_arm_qe);
85 qm_port->int_arm_qe = NULL;
87 rte_free(qm_port->consume_qe);
88 qm_port->consume_qe = NULL;
90 rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
91 dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
94 /* override defaults with value(s) provided on command line */
96 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
97 int *qid_depth_thresholds)
101 for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
102 if (qid_depth_thresholds[q] != 0)
103 dlb2->ev_queues[q].depth_threshold =
104 qid_depth_thresholds[q];
109 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
112 struct dlb2_hw_resource_info *dlb2_info = &handle->info;
115 /* Query driver resources provisioned for this device */
117 ret = dlb2_iface_get_num_resources(handle,
118 &dlb2->hw_rsrc_query_results);
120 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
124 /* Complete filling in device resource info returned to evdev app,
125 * overriding any default values.
126 * The capabilities (CAPs) were set at compile time.
129 evdev_dlb2_default_info.max_event_queues =
130 dlb2->hw_rsrc_query_results.num_ldb_queues;
132 evdev_dlb2_default_info.max_event_ports =
133 dlb2->hw_rsrc_query_results.num_ldb_ports;
135 if (dlb2->version == DLB2_HW_V2_5) {
136 evdev_dlb2_default_info.max_num_events =
137 dlb2->hw_rsrc_query_results.num_credits;
139 evdev_dlb2_default_info.max_num_events =
140 dlb2->hw_rsrc_query_results.num_ldb_credits;
142 /* Save off values used when creating the scheduling domain. */
144 handle->info.num_sched_domains =
145 dlb2->hw_rsrc_query_results.num_sched_domains;
147 if (dlb2->version == DLB2_HW_V2_5) {
148 handle->info.hw_rsrc_max.nb_events_limit =
149 dlb2->hw_rsrc_query_results.num_credits;
151 handle->info.hw_rsrc_max.nb_events_limit =
152 dlb2->hw_rsrc_query_results.num_ldb_credits;
154 handle->info.hw_rsrc_max.num_queues =
155 dlb2->hw_rsrc_query_results.num_ldb_queues +
156 dlb2->hw_rsrc_query_results.num_dir_ports;
158 handle->info.hw_rsrc_max.num_ldb_queues =
159 dlb2->hw_rsrc_query_results.num_ldb_queues;
161 handle->info.hw_rsrc_max.num_ldb_ports =
162 dlb2->hw_rsrc_query_results.num_ldb_ports;
164 handle->info.hw_rsrc_max.num_dir_ports =
165 dlb2->hw_rsrc_query_results.num_dir_ports;
167 handle->info.hw_rsrc_max.reorder_window_size =
168 dlb2->hw_rsrc_query_results.num_hist_list_entries;
170 rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
175 #define DLB2_BASE_10 10
178 dlb2_string_to_int(int *result, const char *str)
183 if (str == NULL || result == NULL)
187 ret = strtol(str, &endptr, DLB2_BASE_10);
191 /* long int and int may be different width for some architectures */
192 if (ret < INT_MIN || ret > INT_MAX || endptr == str)
200 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
202 int *socket_id = opaque;
205 ret = dlb2_string_to_int(socket_id, value);
209 if (*socket_id > RTE_MAX_NUMA_NODES)
215 set_max_num_events(const char *key __rte_unused,
219 int *max_num_events = opaque;
222 if (value == NULL || opaque == NULL) {
223 DLB2_LOG_ERR("NULL pointer\n");
227 ret = dlb2_string_to_int(max_num_events, value);
231 if (*max_num_events < 0 || *max_num_events >
232 DLB2_MAX_NUM_LDB_CREDITS) {
233 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
234 DLB2_MAX_NUM_LDB_CREDITS);
242 set_num_dir_credits(const char *key __rte_unused,
246 int *num_dir_credits = opaque;
249 if (value == NULL || opaque == NULL) {
250 DLB2_LOG_ERR("NULL pointer\n");
254 ret = dlb2_string_to_int(num_dir_credits, value);
258 if (*num_dir_credits < 0 ||
259 *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
260 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
261 DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
269 set_dev_id(const char *key __rte_unused,
273 int *dev_id = opaque;
276 if (value == NULL || opaque == NULL) {
277 DLB2_LOG_ERR("NULL pointer\n");
281 ret = dlb2_string_to_int(dev_id, value);
289 set_cos(const char *key __rte_unused,
293 enum dlb2_cos *cos_id = opaque;
297 if (value == NULL || opaque == NULL) {
298 DLB2_LOG_ERR("NULL pointer\n");
302 ret = dlb2_string_to_int(&x, value);
306 if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
308 "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
319 set_poll_interval(const char *key __rte_unused,
323 int *poll_interval = opaque;
326 if (value == NULL || opaque == NULL) {
327 DLB2_LOG_ERR("NULL pointer\n");
331 ret = dlb2_string_to_int(poll_interval, value);
339 set_sw_credit_quanta(const char *key __rte_unused,
343 int *sw_credit_quanta = opaque;
346 if (value == NULL || opaque == NULL) {
347 DLB2_LOG_ERR("NULL pointer\n");
351 ret = dlb2_string_to_int(sw_credit_quanta, value);
359 set_default_depth_thresh(const char *key __rte_unused,
363 int *default_depth_thresh = opaque;
366 if (value == NULL || opaque == NULL) {
367 DLB2_LOG_ERR("NULL pointer\n");
371 ret = dlb2_string_to_int(default_depth_thresh, value);
379 set_vector_opts_disab(const char *key __rte_unused,
383 bool *dlb2_vector_opts_disabled = opaque;
385 if (value == NULL || opaque == NULL) {
386 DLB2_LOG_ERR("NULL pointer\n");
390 if ((*value == 'y') || (*value == 'Y'))
391 *dlb2_vector_opts_disabled = true;
393 *dlb2_vector_opts_disabled = false;
399 set_qid_depth_thresh(const char *key __rte_unused,
403 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
404 int first, last, thresh, i;
406 if (value == NULL || opaque == NULL) {
407 DLB2_LOG_ERR("NULL pointer\n");
411 /* command line override may take one of the following 3 forms:
412 * qid_depth_thresh=all:<threshold_value> ... all queues
413 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
414 * qid_depth_thresh=qid:<threshold_value> ... just one queue
416 if (sscanf(value, "all:%d", &thresh) == 1) {
418 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
419 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
420 /* we have everything we need */
421 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
424 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
428 if (first > last || first < 0 ||
429 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
430 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
434 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
435 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
436 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
440 for (i = first; i <= last; i++)
441 qid_thresh->val[i] = thresh; /* indexed by qid */
447 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
451 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
452 int first, last, thresh, i;
454 if (value == NULL || opaque == NULL) {
455 DLB2_LOG_ERR("NULL pointer\n");
459 /* command line override may take one of the following 3 forms:
460 * qid_depth_thresh=all:<threshold_value> ... all queues
461 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
462 * qid_depth_thresh=qid:<threshold_value> ... just one queue
464 if (sscanf(value, "all:%d", &thresh) == 1) {
466 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
467 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
468 /* we have everything we need */
469 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
472 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
476 if (first > last || first < 0 ||
477 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
478 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
482 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
483 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
484 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
488 for (i = first; i <= last; i++)
489 qid_thresh->val[i] = thresh; /* indexed by qid */
495 dlb2_eventdev_info_get(struct rte_eventdev *dev,
496 struct rte_event_dev_info *dev_info)
498 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
501 ret = dlb2_hw_query_resources(dlb2);
503 const struct rte_eventdev_data *data = dev->data;
505 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
507 /* fn is void, so fall through and return values set up in
512 /* Add num resources currently owned by this domain.
513 * These would become available if the scheduling domain were reset due
514 * to the application recalling eventdev_configure to *reconfigure* the
517 evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
518 evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
519 if (dlb2->version == DLB2_HW_V2_5) {
520 evdev_dlb2_default_info.max_num_events +=
523 evdev_dlb2_default_info.max_num_events +=
524 dlb2->max_ldb_credits;
526 evdev_dlb2_default_info.max_event_queues =
527 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
528 RTE_EVENT_MAX_QUEUES_PER_DEV);
530 evdev_dlb2_default_info.max_num_events =
531 RTE_MIN(evdev_dlb2_default_info.max_num_events,
532 dlb2->max_num_events_override);
534 *dev_info = evdev_dlb2_default_info;
538 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
539 const struct dlb2_hw_rsrcs *resources_asked,
540 uint8_t device_version)
543 struct dlb2_create_sched_domain_args *cfg;
545 if (resources_asked == NULL) {
546 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
551 /* Map generic qm resources to dlb2 resources */
552 cfg = &handle->cfg.resources;
554 /* DIR ports and queues */
556 cfg->num_dir_ports = resources_asked->num_dir_ports;
557 if (device_version == DLB2_HW_V2_5)
558 cfg->num_credits = resources_asked->num_credits;
560 cfg->num_dir_credits = resources_asked->num_dir_credits;
564 cfg->num_ldb_queues = resources_asked->num_ldb_queues;
568 cfg->cos_strict = 0; /* Best effort */
569 cfg->num_cos_ldb_ports[0] = 0;
570 cfg->num_cos_ldb_ports[1] = 0;
571 cfg->num_cos_ldb_ports[2] = 0;
572 cfg->num_cos_ldb_ports[3] = 0;
574 switch (handle->cos_id) {
576 cfg->num_ldb_ports = 0; /* no don't care ports */
577 cfg->num_cos_ldb_ports[0] =
578 resources_asked->num_ldb_ports;
581 cfg->num_ldb_ports = 0; /* no don't care ports */
582 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
585 cfg->num_ldb_ports = 0; /* no don't care ports */
586 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
589 cfg->num_ldb_ports = 0; /* no don't care ports */
590 cfg->num_cos_ldb_ports[3] =
591 resources_asked->num_ldb_ports;
593 case DLB2_COS_DEFAULT:
594 /* all ldb ports are don't care ports from a cos perspective */
596 resources_asked->num_ldb_ports;
600 if (device_version == DLB2_HW_V2)
601 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
603 cfg->num_atomic_inflights =
604 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
607 cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
608 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
610 if (device_version == DLB2_HW_V2_5) {
611 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
613 resources_asked->num_ldb_ports,
615 cfg->num_atomic_inflights,
616 cfg->num_hist_list_entries,
619 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
621 resources_asked->num_ldb_ports,
623 cfg->num_atomic_inflights,
624 cfg->num_hist_list_entries,
625 cfg->num_ldb_credits,
626 cfg->num_dir_credits);
629 /* Configure the QM */
631 ret = dlb2_iface_sched_domain_create(handle, cfg);
633 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
635 dlb2_error_strings[cfg->response.status]);
640 handle->domain_id = cfg->response.id;
641 handle->cfg.configured = true;
649 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
651 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
652 enum dlb2_configuration_state config_state;
655 dlb2_iface_domain_reset(dlb2);
657 /* Free all dynamically allocated port memory */
658 for (i = 0; i < dlb2->num_ports; i++)
659 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
661 /* If reconfiguring, mark the device's queues and ports as "previously
662 * configured." If the user doesn't reconfigure them, the PMD will
663 * reapply their previous configuration when the device is started.
665 config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
668 for (i = 0; i < dlb2->num_ports; i++) {
669 dlb2->ev_ports[i].qm_port.config_state = config_state;
670 /* Reset setup_done so ports can be reconfigured */
671 dlb2->ev_ports[i].setup_done = false;
672 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
673 dlb2->ev_ports[i].link[j].mapped = false;
676 for (i = 0; i < dlb2->num_queues; i++)
677 dlb2->ev_queues[i].qm_queue.config_state = config_state;
679 for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
680 dlb2->ev_queues[i].setup_done = false;
683 dlb2->num_ldb_ports = 0;
684 dlb2->num_dir_ports = 0;
685 dlb2->num_queues = 0;
686 dlb2->num_ldb_queues = 0;
687 dlb2->num_dir_queues = 0;
688 dlb2->configured = false;
691 /* Note: 1 QM instance per QM device, QM instance/device == event device */
693 dlb2_eventdev_configure(const struct rte_eventdev *dev)
695 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
696 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
697 struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
698 const struct rte_eventdev_data *data = dev->data;
699 const struct rte_event_dev_config *config = &data->dev_conf;
702 /* If this eventdev is already configured, we must release the current
703 * scheduling domain before attempting to configure a new one.
705 if (dlb2->configured) {
706 dlb2_hw_reset_sched_domain(dev, true);
707 ret = dlb2_hw_query_resources(dlb2);
709 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
715 if (config->nb_event_queues > rsrcs->num_queues) {
716 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
717 config->nb_event_queues,
721 if (config->nb_event_ports > (rsrcs->num_ldb_ports
722 + rsrcs->num_dir_ports)) {
723 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
724 config->nb_event_ports,
725 (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
728 if (config->nb_events_limit > rsrcs->nb_events_limit) {
729 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
730 config->nb_events_limit,
731 rsrcs->nb_events_limit);
735 if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
736 dlb2->global_dequeue_wait = false;
740 dlb2->global_dequeue_wait = true;
742 /* note size mismatch of timeout vals in eventdev lib. */
743 timeout32 = config->dequeue_timeout_ns;
745 dlb2->global_dequeue_wait_ticks =
746 timeout32 * (rte_get_timer_hz() / 1E9);
749 /* Does this platform support umonitor/umwait? */
750 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
751 dlb2->umwait_allowed = true;
753 rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
754 rsrcs->num_ldb_ports = config->nb_event_ports - rsrcs->num_dir_ports;
755 /* 1 dir queue per dir port */
756 rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
758 if (dlb2->version == DLB2_HW_V2_5) {
759 rsrcs->num_credits = 0;
760 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
761 rsrcs->num_credits = config->nb_events_limit;
763 /* Scale down nb_events_limit by 4 for directed credits,
764 * since there are 4x as many load-balanced credits.
766 rsrcs->num_ldb_credits = 0;
767 rsrcs->num_dir_credits = 0;
769 if (rsrcs->num_ldb_queues)
770 rsrcs->num_ldb_credits = config->nb_events_limit;
771 if (rsrcs->num_dir_ports)
772 rsrcs->num_dir_credits = config->nb_events_limit / 4;
773 if (dlb2->num_dir_credits_override != -1)
774 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
777 if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
778 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
782 dlb2->new_event_limit = config->nb_events_limit;
783 __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
785 /* Save number of ports/queues for this event dev */
786 dlb2->num_ports = config->nb_event_ports;
787 dlb2->num_queues = config->nb_event_queues;
788 dlb2->num_dir_ports = rsrcs->num_dir_ports;
789 dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
790 dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
791 dlb2->num_dir_queues = dlb2->num_dir_ports;
792 if (dlb2->version == DLB2_HW_V2_5) {
793 dlb2->credit_pool = rsrcs->num_credits;
794 dlb2->max_credits = rsrcs->num_credits;
796 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
797 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
798 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
799 dlb2->max_dir_credits = rsrcs->num_dir_credits;
802 dlb2->configured = true;
808 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
810 struct rte_event_port_conf *port_conf)
812 RTE_SET_USED(port_id);
813 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
815 port_conf->new_event_threshold = dlb2->new_event_limit;
816 port_conf->dequeue_depth = 32;
817 port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
818 port_conf->event_port_cfg = 0;
822 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
824 struct rte_event_queue_conf *queue_conf)
827 RTE_SET_USED(queue_id);
829 queue_conf->nb_atomic_flows = 1024;
830 queue_conf->nb_atomic_order_sequences = 64;
831 queue_conf->event_queue_cfg = 0;
832 queue_conf->priority = 0;
836 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
838 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
839 struct dlb2_get_sn_allocation_args cfg;
844 ret = dlb2_iface_get_sn_allocation(handle, &cfg);
846 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
847 ret, dlb2_error_strings[cfg.response.status]);
851 return cfg.response.id;
855 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
857 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
858 struct dlb2_set_sn_allocation_args cfg;
864 ret = dlb2_iface_set_sn_allocation(handle, &cfg);
866 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
867 ret, dlb2_error_strings[cfg.response.status]);
875 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
877 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
878 struct dlb2_get_sn_occupancy_args cfg;
883 ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
885 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
886 ret, dlb2_error_strings[cfg.response.status]);
890 return cfg.response.id;
893 /* Query the current sequence number allocations and, if they conflict with the
894 * requested LDB queue configuration, attempt to re-allocate sequence numbers.
895 * This is best-effort; if it fails, the PMD will attempt to configure the
896 * load-balanced queue and return an error.
899 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
900 const struct rte_event_queue_conf *queue_conf)
902 int grp_occupancy[DLB2_NUM_SN_GROUPS];
903 int grp_alloc[DLB2_NUM_SN_GROUPS];
904 int i, sequence_numbers;
906 sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
908 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
911 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
912 if (grp_alloc[i] < 0)
915 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
917 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
918 if (grp_occupancy[i] < 0)
921 /* DLB has at least one available slot for the requested
922 * sequence numbers, so no further configuration required.
924 if (grp_alloc[i] == sequence_numbers &&
925 grp_occupancy[i] < total_slots)
929 /* None of the sequence number groups are configured for the requested
930 * sequence numbers, so we have to reconfigure one of them. This is
931 * only possible if a group is not in use.
933 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
934 if (grp_occupancy[i] == 0)
938 if (i == DLB2_NUM_SN_GROUPS) {
939 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
940 __func__, sequence_numbers);
944 /* Attempt to configure slot i with the requested number of sequence
945 * numbers. Ignore the return value -- if this fails, the error will be
946 * caught during subsequent queue configuration.
948 dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
952 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
953 struct dlb2_eventdev_queue *ev_queue,
954 const struct rte_event_queue_conf *evq_conf)
956 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
957 struct dlb2_queue *queue = &ev_queue->qm_queue;
958 struct dlb2_create_ldb_queue_args cfg;
963 if (evq_conf == NULL)
966 if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
967 if (evq_conf->nb_atomic_order_sequences != 0)
968 sched_type = RTE_SCHED_TYPE_ORDERED;
970 sched_type = RTE_SCHED_TYPE_PARALLEL;
972 sched_type = evq_conf->schedule_type;
974 cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
975 cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
976 cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
978 if (sched_type != RTE_SCHED_TYPE_ORDERED) {
979 cfg.num_sequence_numbers = 0;
980 cfg.num_qid_inflights = 2048;
983 /* App should set this to the number of hardware flows they want, not
984 * the overall number of flows they're going to use. E.g. if app is
985 * using 64 flows and sets compression to 64, best-case they'll get
986 * 64 unique hashed flows in hardware.
988 switch (evq_conf->nb_atomic_flows) {
989 /* Valid DLB2 compression levels */
994 case (1 * 1024): /* 1K */
995 case (2 * 1024): /* 2K */
996 case (4 * 1024): /* 4K */
997 case (64 * 1024): /* 64K */
998 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1001 /* Invalid compression level */
1002 cfg.lock_id_comp_level = 0; /* no compression */
1005 if (ev_queue->depth_threshold == 0) {
1006 cfg.depth_threshold = dlb2->default_depth_thresh;
1007 ev_queue->depth_threshold =
1008 dlb2->default_depth_thresh;
1010 cfg.depth_threshold = ev_queue->depth_threshold;
1012 ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1014 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1015 ret, dlb2_error_strings[cfg.response.status]);
1019 qm_qid = cfg.response.id;
1021 /* Save off queue config for debug, resource lookups, and reconfig */
1022 queue->num_qid_inflights = cfg.num_qid_inflights;
1023 queue->num_atm_inflights = cfg.num_atomic_inflights;
1025 queue->sched_type = sched_type;
1026 queue->config_state = DLB2_CONFIGURED;
1028 DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1030 cfg.num_atomic_inflights,
1031 cfg.num_sequence_numbers,
1032 cfg.num_qid_inflights);
1038 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1039 struct dlb2_eventdev_queue *ev_queue,
1040 const struct rte_event_queue_conf *queue_conf)
1042 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1045 if (queue_conf->nb_atomic_order_sequences)
1046 dlb2_program_sn_allocation(dlb2, queue_conf);
1048 qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1050 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1055 dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1057 ev_queue->qm_queue.id = qm_qid;
1062 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1066 for (i = 0; i < dlb2->num_queues; i++) {
1067 if (dlb2->ev_queues[i].setup_done &&
1068 dlb2->ev_queues[i].qm_queue.is_directed)
1076 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1077 struct dlb2_eventdev_queue *ev_queue)
1079 struct dlb2_eventdev_port *ev_port;
1082 for (i = 0; i < dlb2->num_ports; i++) {
1083 ev_port = &dlb2->ev_ports[i];
1085 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1086 if (!ev_port->link[j].valid ||
1087 ev_port->link[j].queue_id != ev_queue->id)
1090 ev_port->link[j].valid = false;
1091 ev_port->num_links--;
1095 ev_queue->num_links = 0;
1099 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1101 const struct rte_event_queue_conf *queue_conf)
1103 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1104 struct dlb2_eventdev_queue *ev_queue;
1107 if (queue_conf == NULL)
1110 if (ev_qid >= dlb2->num_queues)
1113 ev_queue = &dlb2->ev_queues[ev_qid];
1115 ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1116 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1117 ev_queue->id = ev_qid;
1118 ev_queue->conf = *queue_conf;
1120 if (!ev_queue->qm_queue.is_directed) {
1121 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1123 /* The directed queue isn't setup until link time, at which
1124 * point we know its directed port ID. Directed queue setup
1125 * will only fail if this queue is already setup or there are
1126 * no directed queues left to configure.
1130 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1132 if (ev_queue->setup_done ||
1133 dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1137 /* Tear down pre-existing port->queue links */
1138 if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1139 dlb2_queue_link_teardown(dlb2, ev_queue);
1142 ev_queue->setup_done = true;
1148 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1150 struct dlb2_cq_pop_qe *qe;
1152 qe = rte_zmalloc(mz_name,
1153 DLB2_NUM_QES_PER_CACHE_LINE *
1154 sizeof(struct dlb2_cq_pop_qe),
1155 RTE_CACHE_LINE_SIZE);
1158 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1161 qm_port->consume_qe = qe;
1167 /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1170 qe->tokens = 0; /* set at run time */
1173 /* Completion IDs are disabled */
1180 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1182 struct dlb2_enqueue_qe *qe;
1184 qe = rte_zmalloc(mz_name,
1185 DLB2_NUM_QES_PER_CACHE_LINE *
1186 sizeof(struct dlb2_enqueue_qe),
1187 RTE_CACHE_LINE_SIZE);
1190 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1193 qm_port->int_arm_qe = qe;
1195 /* V2 - INT ARM is CQ_TOKEN + FRAG */
1202 /* Completion IDs are disabled */
1209 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1213 sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1215 qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1217 if (qm_port->qe4 == NULL) {
1218 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1223 ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1225 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1229 ret = dlb2_init_consume_qe(qm_port, mz_name);
1231 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1239 dlb2_free_qe_mem(qm_port);
1244 static inline uint16_t
1245 dlb2_event_enqueue_delayed(void *event_port,
1246 const struct rte_event events[]);
1248 static inline uint16_t
1249 dlb2_event_enqueue_burst_delayed(void *event_port,
1250 const struct rte_event events[],
1253 static inline uint16_t
1254 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1255 const struct rte_event events[],
1258 static inline uint16_t
1259 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1260 const struct rte_event events[],
1263 /* Generate the required bitmask for rotate-style expected QE gen bits.
1264 * This requires a pattern of 1's and zeros, starting with expected as
1265 * 1 bits, so when hardware writes 0's they're "new". This requires the
1266 * ring size to be powers of 2 to wrap correctly.
1269 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1271 uint64_t cq_build_mask = 0;
1275 return; /* need to fall back to scalar code */
1278 * all 1's in first u64, all zeros in second is correct bit pattern to
1279 * start. Special casing == 64 easier than adapting complex loop logic.
1281 if (cq_depth == 64) {
1282 qm_port->cq_rolling_mask = 0;
1283 qm_port->cq_rolling_mask_2 = -1;
1287 for (i = 0; i < 64; i += (cq_depth * 2))
1288 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1290 qm_port->cq_rolling_mask = cq_build_mask;
1291 qm_port->cq_rolling_mask_2 = cq_build_mask;
1295 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1296 struct dlb2_eventdev_port *ev_port,
1297 uint32_t dequeue_depth,
1298 uint32_t enqueue_depth)
1300 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1301 struct dlb2_create_ldb_port_args cfg = { {0} };
1303 struct dlb2_port *qm_port = NULL;
1304 char mz_name[RTE_MEMZONE_NAMESIZE];
1305 uint32_t qm_port_id;
1306 uint16_t ldb_credit_high_watermark = 0;
1307 uint16_t dir_credit_high_watermark = 0;
1308 uint16_t credit_high_watermark = 0;
1313 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1314 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1319 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1320 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1321 DLB2_MIN_ENQUEUE_DEPTH);
1325 rte_spinlock_lock(&handle->resource_lock);
1327 /* We round up to the next power of 2 if necessary */
1328 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1329 cfg.cq_depth_threshold = 1;
1331 cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1333 if (handle->cos_id == DLB2_COS_DEFAULT)
1336 cfg.cos_id = handle->cos_id;
1340 /* User controls the LDB high watermark via enqueue depth. The DIR high
1341 * watermark is equal, unless the directed credit pool is too small.
1343 if (dlb2->version == DLB2_HW_V2) {
1344 ldb_credit_high_watermark = enqueue_depth;
1345 /* If there are no directed ports, the kernel driver will
1346 * ignore this port's directed credit settings. Don't use
1347 * enqueue_depth if it would require more directed credits
1348 * than are available.
1350 dir_credit_high_watermark =
1351 RTE_MIN(enqueue_depth,
1352 handle->cfg.num_dir_credits / dlb2->num_ports);
1354 credit_high_watermark = enqueue_depth;
1358 ret = dlb2_iface_ldb_port_create(handle, &cfg, dlb2->poll_mode);
1360 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1361 ret, dlb2_error_strings[cfg.response.status]);
1365 qm_port_id = cfg.response.id;
1367 DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1368 ev_port->id, qm_port_id);
1370 qm_port = &ev_port->qm_port;
1371 qm_port->ev_port = ev_port; /* back ptr */
1372 qm_port->dlb2 = dlb2; /* back ptr */
1374 * Allocate and init local qe struct(s).
1375 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1378 snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1381 ret = dlb2_init_qe_mem(qm_port, mz_name);
1383 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1387 qm_port->id = qm_port_id;
1389 if (dlb2->version == DLB2_HW_V2) {
1390 qm_port->cached_ldb_credits = 0;
1391 qm_port->cached_dir_credits = 0;
1393 qm_port->cached_credits = 0;
1395 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1396 * the effective depth is smaller.
1398 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1399 qm_port->cq_idx = 0;
1400 qm_port->cq_idx_unmasked = 0;
1402 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1403 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1405 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1407 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1408 /* starting value of gen bit - it toggles at wrap time */
1409 qm_port->gen_bit = 1;
1411 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1413 qm_port->int_armed = false;
1415 /* Save off for later use in info and lookup APIs. */
1416 qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1418 qm_port->dequeue_depth = dequeue_depth;
1419 qm_port->token_pop_thresh = dequeue_depth;
1421 /* The default enqueue functions do not include delayed-pop support for
1422 * performance reasons.
1424 if (qm_port->token_pop_mode == DELAYED_POP) {
1425 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1426 dlb2->event_dev->enqueue_burst =
1427 dlb2_event_enqueue_burst_delayed;
1428 dlb2->event_dev->enqueue_new_burst =
1429 dlb2_event_enqueue_new_burst_delayed;
1430 dlb2->event_dev->enqueue_forward_burst =
1431 dlb2_event_enqueue_forward_burst_delayed;
1434 qm_port->owed_tokens = 0;
1435 qm_port->issued_releases = 0;
1437 /* Save config message too. */
1438 rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1441 qm_port->state = PORT_STARTED; /* enabled at create time */
1442 qm_port->config_state = DLB2_CONFIGURED;
1444 if (dlb2->version == DLB2_HW_V2) {
1445 qm_port->dir_credits = dir_credit_high_watermark;
1446 qm_port->ldb_credits = ldb_credit_high_watermark;
1447 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1448 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1450 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1453 qm_port->ldb_credits,
1454 qm_port->dir_credits);
1456 qm_port->credits = credit_high_watermark;
1457 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1459 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1465 qm_port->use_scalar = false;
1467 #if (!defined RTE_ARCH_X86_64)
1468 qm_port->use_scalar = true;
1470 if ((qm_port->cq_depth > 64) ||
1471 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1472 (dlb2->vector_opts_disabled == true))
1473 qm_port->use_scalar = true;
1476 rte_spinlock_unlock(&handle->resource_lock);
1483 dlb2_free_qe_mem(qm_port);
1485 rte_spinlock_unlock(&handle->resource_lock);
1487 DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1493 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1494 struct dlb2_eventdev_port *ev_port)
1496 struct dlb2_eventdev_queue *ev_queue;
1499 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1500 if (!ev_port->link[i].valid)
1503 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1505 ev_port->link[i].valid = false;
1506 ev_port->num_links--;
1507 ev_queue->num_links--;
1512 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1513 struct dlb2_eventdev_port *ev_port,
1514 uint32_t dequeue_depth,
1515 uint32_t enqueue_depth)
1517 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1518 struct dlb2_create_dir_port_args cfg = { {0} };
1520 struct dlb2_port *qm_port = NULL;
1521 char mz_name[RTE_MEMZONE_NAMESIZE];
1522 uint32_t qm_port_id;
1523 uint16_t ldb_credit_high_watermark = 0;
1524 uint16_t dir_credit_high_watermark = 0;
1525 uint16_t credit_high_watermark = 0;
1527 if (dlb2 == NULL || handle == NULL)
1530 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1531 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1532 DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1536 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1537 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1538 DLB2_MIN_ENQUEUE_DEPTH);
1542 rte_spinlock_lock(&handle->resource_lock);
1544 /* Directed queues are configured at link time. */
1547 /* We round up to the next power of 2 if necessary */
1548 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1549 cfg.cq_depth_threshold = 1;
1551 /* User controls the LDB high watermark via enqueue depth. The DIR high
1552 * watermark is equal, unless the directed credit pool is too small.
1554 if (dlb2->version == DLB2_HW_V2) {
1555 ldb_credit_high_watermark = enqueue_depth;
1556 /* Don't use enqueue_depth if it would require more directed
1557 * credits than are available.
1559 dir_credit_high_watermark =
1560 RTE_MIN(enqueue_depth,
1561 handle->cfg.num_dir_credits / dlb2->num_ports);
1563 credit_high_watermark = enqueue_depth;
1567 ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
1569 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1570 ret, dlb2_error_strings[cfg.response.status]);
1574 qm_port_id = cfg.response.id;
1576 DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1577 ev_port->id, qm_port_id);
1579 qm_port = &ev_port->qm_port;
1580 qm_port->ev_port = ev_port; /* back ptr */
1581 qm_port->dlb2 = dlb2; /* back ptr */
1584 * Init local qe struct(s).
1585 * Note: MOVDIR64 requires the enqueue QE to be aligned
1588 snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1591 ret = dlb2_init_qe_mem(qm_port, mz_name);
1594 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1598 qm_port->id = qm_port_id;
1600 if (dlb2->version == DLB2_HW_V2) {
1601 qm_port->cached_ldb_credits = 0;
1602 qm_port->cached_dir_credits = 0;
1604 qm_port->cached_credits = 0;
1606 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1607 * the effective depth is smaller.
1609 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1610 qm_port->cq_idx = 0;
1611 qm_port->cq_idx_unmasked = 0;
1613 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1614 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1616 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1618 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1619 /* starting value of gen bit - it toggles at wrap time */
1620 qm_port->gen_bit = 1;
1621 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1623 qm_port->int_armed = false;
1625 /* Save off for later use in info and lookup APIs. */
1626 qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1628 qm_port->dequeue_depth = dequeue_depth;
1630 /* Directed ports are auto-pop, by default. */
1631 qm_port->token_pop_mode = AUTO_POP;
1632 qm_port->owed_tokens = 0;
1633 qm_port->issued_releases = 0;
1635 /* Save config message too. */
1636 rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1639 qm_port->state = PORT_STARTED; /* enabled at create time */
1640 qm_port->config_state = DLB2_CONFIGURED;
1642 if (dlb2->version == DLB2_HW_V2) {
1643 qm_port->dir_credits = dir_credit_high_watermark;
1644 qm_port->ldb_credits = ldb_credit_high_watermark;
1645 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1646 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1648 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1651 dir_credit_high_watermark,
1652 ldb_credit_high_watermark);
1654 qm_port->credits = credit_high_watermark;
1655 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1657 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1660 credit_high_watermark);
1663 #if (!defined RTE_ARCH_X86_64)
1664 qm_port->use_scalar = true;
1666 if ((qm_port->cq_depth > 64) ||
1667 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1668 (dlb2->vector_opts_disabled == true))
1669 qm_port->use_scalar = true;
1672 rte_spinlock_unlock(&handle->resource_lock);
1679 dlb2_free_qe_mem(qm_port);
1681 rte_spinlock_unlock(&handle->resource_lock);
1683 DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1689 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1691 const struct rte_event_port_conf *port_conf)
1693 struct dlb2_eventdev *dlb2;
1694 struct dlb2_eventdev_port *ev_port;
1697 if (dev == NULL || port_conf == NULL) {
1698 DLB2_LOG_ERR("Null parameter\n");
1702 dlb2 = dlb2_pmd_priv(dev);
1704 if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1707 if (port_conf->dequeue_depth >
1708 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1709 port_conf->enqueue_depth >
1710 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1713 ev_port = &dlb2->ev_ports[ev_port_id];
1715 if (ev_port->setup_done) {
1716 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1720 ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1721 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1723 if (!ev_port->qm_port.is_directed) {
1724 ret = dlb2_hw_create_ldb_port(dlb2,
1726 port_conf->dequeue_depth,
1727 port_conf->enqueue_depth);
1729 DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1735 ret = dlb2_hw_create_dir_port(dlb2,
1737 port_conf->dequeue_depth,
1738 port_conf->enqueue_depth);
1740 DLB2_LOG_ERR("Failed to create the DIR port\n");
1745 /* Save off port config for reconfig */
1746 ev_port->conf = *port_conf;
1748 ev_port->id = ev_port_id;
1749 ev_port->enq_configured = true;
1750 ev_port->setup_done = true;
1751 ev_port->inflight_max = port_conf->new_event_threshold;
1752 ev_port->implicit_release = !(port_conf->event_port_cfg &
1753 RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1754 ev_port->outstanding_releases = 0;
1755 ev_port->inflight_credits = 0;
1756 ev_port->credit_update_quanta = dlb2->sw_credit_quanta;
1757 ev_port->dlb2 = dlb2; /* reverse link */
1759 /* Tear down pre-existing port->queue links */
1760 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1761 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1763 dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1769 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1770 uint32_t qm_port_id,
1774 struct dlb2_map_qid_args cfg;
1781 cfg.port_id = qm_port_id;
1783 cfg.priority = EV_TO_DLB2_PRIO(priority);
1785 ret = dlb2_iface_map_qid(handle, &cfg);
1787 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1788 ret, dlb2_error_strings[cfg.response.status]);
1789 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1790 handle->domain_id, cfg.port_id,
1794 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1795 qm_qid, qm_port_id);
1802 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1803 struct dlb2_eventdev_port *ev_port,
1804 struct dlb2_eventdev_queue *ev_queue,
1807 int first_avail = -1;
1810 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1811 if (ev_port->link[i].valid) {
1812 if (ev_port->link[i].queue_id == ev_queue->id &&
1813 ev_port->link[i].priority == priority) {
1814 if (ev_port->link[i].mapped)
1815 return 0; /* already mapped */
1818 } else if (first_avail == -1)
1821 if (first_avail == -1) {
1822 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1823 ev_port->qm_port.id);
1827 ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1828 ev_port->qm_port.id,
1829 ev_queue->qm_queue.id,
1833 ev_port->link[first_avail].mapped = true;
1839 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1840 struct dlb2_eventdev_queue *ev_queue,
1843 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1844 struct dlb2_create_dir_queue_args cfg;
1847 /* The directed port is always configured before its queue */
1848 cfg.port_id = qm_port_id;
1850 if (ev_queue->depth_threshold == 0) {
1851 cfg.depth_threshold = dlb2->default_depth_thresh;
1852 ev_queue->depth_threshold =
1853 dlb2->default_depth_thresh;
1855 cfg.depth_threshold = ev_queue->depth_threshold;
1857 ret = dlb2_iface_dir_queue_create(handle, &cfg);
1859 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1860 ret, dlb2_error_strings[cfg.response.status]);
1864 return cfg.response.id;
1868 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1869 struct dlb2_eventdev_queue *ev_queue,
1870 struct dlb2_eventdev_port *ev_port)
1874 qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1877 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1881 dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1883 ev_queue->qm_queue.id = qm_qid;
1889 dlb2_do_port_link(struct rte_eventdev *dev,
1890 struct dlb2_eventdev_queue *ev_queue,
1891 struct dlb2_eventdev_port *ev_port,
1894 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1897 /* Don't link until start time. */
1898 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1901 if (ev_queue->qm_queue.is_directed)
1902 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1904 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1907 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1908 ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1909 ev_queue->id, ev_port->id);
1919 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1924 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1925 struct dlb2_eventdev_queue *ev_queue;
1926 bool port_is_dir, queue_is_dir;
1928 if (queue_id > dlb2->num_queues) {
1929 rte_errno = -EINVAL;
1933 ev_queue = &dlb2->ev_queues[queue_id];
1935 if (!ev_queue->setup_done &&
1936 ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1937 rte_errno = -EINVAL;
1941 port_is_dir = ev_port->qm_port.is_directed;
1942 queue_is_dir = ev_queue->qm_queue.is_directed;
1944 if (port_is_dir != queue_is_dir) {
1945 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1946 queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1947 port_is_dir ? "DIR" : "LDB", ev_port->id);
1949 rte_errno = -EINVAL;
1953 /* Check if there is space for the requested link */
1954 if (!link_exists && index == -1) {
1955 DLB2_LOG_ERR("no space for new link\n");
1956 rte_errno = -ENOSPC;
1960 /* Check if the directed port is already linked */
1961 if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
1963 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
1965 rte_errno = -EINVAL;
1969 /* Check if the directed queue is already linked */
1970 if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
1972 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
1974 rte_errno = -EINVAL;
1982 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
1983 const uint8_t queues[], const uint8_t priorities[],
1987 struct dlb2_eventdev_port *ev_port = event_port;
1988 struct dlb2_eventdev *dlb2;
1993 if (ev_port == NULL) {
1994 DLB2_LOG_ERR("dlb2: evport not setup\n");
1995 rte_errno = -EINVAL;
1999 if (!ev_port->setup_done &&
2000 ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2001 DLB2_LOG_ERR("dlb2: evport not setup\n");
2002 rte_errno = -EINVAL;
2006 /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2009 if (nb_links == 0) {
2010 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2011 return 0; /* Ignore and return success */
2014 dlb2 = ev_port->dlb2;
2016 DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2018 ev_port->qm_port.is_directed ? "DIR" : "LDB",
2021 for (i = 0; i < nb_links; i++) {
2022 struct dlb2_eventdev_queue *ev_queue;
2023 uint8_t queue_id, prio;
2027 queue_id = queues[i];
2028 prio = priorities[i];
2030 /* Check if the link already exists. */
2031 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2032 if (ev_port->link[j].valid) {
2033 if (ev_port->link[j].queue_id == queue_id) {
2038 } else if (index == -1) {
2042 /* could not link */
2046 /* Check if already linked at the requested priority */
2047 if (found && ev_port->link[j].priority == prio)
2050 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2051 break; /* return index of offending queue */
2053 ev_queue = &dlb2->ev_queues[queue_id];
2055 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2056 break; /* return index of offending queue */
2058 ev_queue->num_links++;
2060 ev_port->link[index].queue_id = queue_id;
2061 ev_port->link[index].priority = prio;
2062 ev_port->link[index].valid = true;
2063 /* Entry already exists? If so, then must be prio change */
2065 ev_port->num_links++;
2071 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2072 uint32_t qm_port_id,
2075 struct dlb2_unmap_qid_args cfg;
2081 cfg.port_id = qm_port_id;
2084 ret = dlb2_iface_unmap_qid(handle, &cfg);
2086 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2087 ret, dlb2_error_strings[cfg.response.status]);
2093 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2094 struct dlb2_eventdev_port *ev_port,
2095 struct dlb2_eventdev_queue *ev_queue)
2099 /* Don't unlink until start time. */
2100 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2103 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2104 if (ev_port->link[i].valid &&
2105 ev_port->link[i].queue_id == ev_queue->id)
2109 /* This is expected with eventdev API!
2110 * It blindly attemmpts to unmap all queues.
2112 if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2113 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2114 ev_queue->qm_queue.id,
2115 ev_port->qm_port.id);
2119 ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2120 ev_port->qm_port.id,
2121 ev_queue->qm_queue.id);
2123 ev_port->link[i].mapped = false;
2129 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2130 uint8_t queues[], uint16_t nb_unlinks)
2132 struct dlb2_eventdev_port *ev_port = event_port;
2133 struct dlb2_eventdev *dlb2;
2138 if (!ev_port->setup_done) {
2139 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2141 rte_errno = -EINVAL;
2145 if (queues == NULL || nb_unlinks == 0) {
2146 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2147 return 0; /* Ignore and return success */
2150 if (ev_port->qm_port.is_directed) {
2151 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2154 return nb_unlinks; /* as if success */
2157 dlb2 = ev_port->dlb2;
2159 for (i = 0; i < nb_unlinks; i++) {
2160 struct dlb2_eventdev_queue *ev_queue;
2163 if (queues[i] >= dlb2->num_queues) {
2164 DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2165 rte_errno = -EINVAL;
2166 return i; /* return index of offending queue */
2169 ev_queue = &dlb2->ev_queues[queues[i]];
2171 /* Does a link exist? */
2172 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2173 if (ev_port->link[j].queue_id == queues[i] &&
2174 ev_port->link[j].valid)
2177 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2180 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2182 DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2183 ret, ev_port->id, queues[i]);
2184 rte_errno = -ENOENT;
2185 return i; /* return index of offending queue */
2188 ev_port->link[j].valid = false;
2189 ev_port->num_links--;
2190 ev_queue->num_links--;
2197 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2200 struct dlb2_eventdev_port *ev_port = event_port;
2201 struct dlb2_eventdev *dlb2;
2202 struct dlb2_hw_dev *handle;
2203 struct dlb2_pending_port_unmaps_args cfg;
2208 if (!ev_port->setup_done) {
2209 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2211 rte_errno = -EINVAL;
2215 cfg.port_id = ev_port->qm_port.id;
2216 dlb2 = ev_port->dlb2;
2217 handle = &dlb2->qm_instance;
2218 ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2221 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2222 ret, dlb2_error_strings[cfg.response.status]);
2226 return cfg.response.id;
2230 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2232 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2235 /* If an event queue or port was previously configured, but hasn't been
2236 * reconfigured, reapply its original configuration.
2238 for (i = 0; i < dlb2->num_queues; i++) {
2239 struct dlb2_eventdev_queue *ev_queue;
2241 ev_queue = &dlb2->ev_queues[i];
2243 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2246 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2248 DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2253 for (i = 0; i < dlb2->num_ports; i++) {
2254 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2256 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2259 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2261 DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2271 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2273 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2276 /* Perform requested port->queue links */
2277 for (i = 0; i < dlb2->num_ports; i++) {
2278 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2281 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2282 struct dlb2_eventdev_queue *ev_queue;
2283 uint8_t prio, queue_id;
2285 if (!ev_port->link[j].valid)
2288 prio = ev_port->link[j].priority;
2289 queue_id = ev_port->link[j].queue_id;
2291 if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2294 ev_queue = &dlb2->ev_queues[queue_id];
2296 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2305 dlb2_eventdev_start(struct rte_eventdev *dev)
2307 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2308 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2309 struct dlb2_start_domain_args cfg;
2312 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2313 if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2314 DLB2_LOG_ERR("bad state %d for dev_start\n",
2315 (int)dlb2->run_state);
2316 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2319 dlb2->run_state = DLB2_RUN_STATE_STARTING;
2320 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2322 /* If the device was configured more than once, some event ports and/or
2323 * queues may need to be reconfigured.
2325 ret = dlb2_eventdev_reapply_configuration(dev);
2329 /* The DLB PMD delays port links until the device is started. */
2330 ret = dlb2_eventdev_apply_port_links(dev);
2334 for (i = 0; i < dlb2->num_ports; i++) {
2335 if (!dlb2->ev_ports[i].setup_done) {
2336 DLB2_LOG_ERR("dlb2: port %d not setup", i);
2341 for (i = 0; i < dlb2->num_queues; i++) {
2342 if (dlb2->ev_queues[i].num_links == 0) {
2343 DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2348 ret = dlb2_iface_sched_domain_start(handle, &cfg);
2350 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2351 ret, dlb2_error_strings[cfg.response.status]);
2355 dlb2->run_state = DLB2_RUN_STATE_STARTED;
2356 DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2361 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2363 /* Load-balanced cmd bytes */
2364 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2365 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2366 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2369 /* Directed cmd bytes */
2370 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2371 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2372 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2376 static inline uint32_t
2377 dlb2_port_credits_get(struct dlb2_port *qm_port,
2378 enum dlb2_hw_queue_types type)
2380 uint32_t credits = *qm_port->credit_pool[type];
2381 uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
2383 if (unlikely(credits < batch_size))
2384 batch_size = credits;
2386 if (likely(credits &&
2387 __atomic_compare_exchange_n(
2388 qm_port->credit_pool[type],
2389 &credits, credits - batch_size, false,
2390 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2397 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2398 struct dlb2_eventdev_port *ev_port)
2400 uint16_t quanta = ev_port->credit_update_quanta;
2402 if (ev_port->inflight_credits >= quanta * 2) {
2403 /* Replenish credits, saving one quanta for enqueues */
2404 uint16_t val = ev_port->inflight_credits - quanta;
2406 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2407 ev_port->inflight_credits -= val;
2412 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2413 struct dlb2_eventdev_port *ev_port)
2415 uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2419 if (unlikely(ev_port->inflight_max < sw_inflights)) {
2420 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2421 rte_errno = -ENOSPC;
2425 if (ev_port->inflight_credits < num) {
2426 /* check if event enqueue brings ev_port over max threshold */
2427 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2429 if (sw_inflights + credit_update_quanta >
2430 dlb2->new_event_limit) {
2432 ev_port->stats.traffic.tx_nospc_new_event_limit,
2434 rte_errno = -ENOSPC;
2438 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2440 ev_port->inflight_credits += (credit_update_quanta);
2442 if (ev_port->inflight_credits < num) {
2444 ev_port->stats.traffic.tx_nospc_inflight_credits,
2446 rte_errno = -ENOSPC;
2455 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2457 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2458 qm_port->cached_ldb_credits =
2459 dlb2_port_credits_get(qm_port,
2461 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2463 qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2465 DLB2_LOG_DBG("ldb credits exhausted\n");
2466 return 1; /* credits exhausted */
2474 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2476 if (unlikely(qm_port->cached_dir_credits == 0)) {
2477 qm_port->cached_dir_credits =
2478 dlb2_port_credits_get(qm_port,
2480 if (unlikely(qm_port->cached_dir_credits == 0)) {
2482 qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2484 DLB2_LOG_DBG("dir credits exhausted\n");
2485 return 1; /* credits exhausted */
2493 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2495 if (unlikely(qm_port->cached_credits == 0)) {
2496 qm_port->cached_credits =
2497 dlb2_port_credits_get(qm_port,
2498 DLB2_COMBINED_POOL);
2499 if (unlikely(qm_port->cached_credits == 0)) {
2501 qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2502 DLB2_LOG_DBG("credits exhausted\n");
2503 return 1; /* credits exhausted */
2510 static __rte_always_inline void
2511 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2512 struct process_local_port_data *port_data)
2514 dlb2_movdir64b(port_data->pp_addr, qe4);
2518 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2520 struct process_local_port_data *port_data;
2521 struct dlb2_cq_pop_qe *qe;
2523 RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2525 qe = qm_port->consume_qe;
2527 qe->tokens = num - 1;
2529 /* No store fence needed since no pointer is being sent, and CQ token
2530 * pops can be safely reordered with other HCWs.
2532 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2534 dlb2_movntdq_single(port_data->pp_addr, qe);
2536 DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2538 qm_port->owed_tokens = 0;
2544 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2546 struct process_local_port_data *port_data)
2548 /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2549 * application writes complete before enqueueing the QE.
2554 dlb2_pp_write(qm_port->qe4, port_data);
2558 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2560 struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2561 int num = qm_port->owed_tokens;
2563 qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2564 qe[idx].tokens = num - 1;
2566 qm_port->owed_tokens = 0;
2570 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2571 const struct rte_event ev[],
2573 uint8_t *sched_type,
2576 struct dlb2_enqueue_qe *qe;
2577 uint16_t sched_word[4];
2583 sse_qe[0] = _mm_setzero_si128();
2584 sse_qe[1] = _mm_setzero_si128();
2588 /* Construct the metadata portion of two HCWs in one 128b SSE
2589 * register. HCW metadata is constructed in the SSE registers
2591 * sse_qe[0][63:0]: qe[0]'s metadata
2592 * sse_qe[0][127:64]: qe[1]'s metadata
2593 * sse_qe[1][63:0]: qe[2]'s metadata
2594 * sse_qe[1][127:64]: qe[3]'s metadata
2597 /* Convert the event operation into a command byte and store it
2599 * sse_qe[0][63:56] = cmd_byte_map[is_directed][ev[0].op]
2600 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2601 * sse_qe[1][63:56] = cmd_byte_map[is_directed][ev[2].op]
2602 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2604 #define DLB2_QE_CMD_BYTE 7
2605 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2606 cmd_byte_map[qm_port->is_directed][ev[0].op],
2608 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2609 cmd_byte_map[qm_port->is_directed][ev[1].op],
2610 DLB2_QE_CMD_BYTE + 8);
2611 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2612 cmd_byte_map[qm_port->is_directed][ev[2].op],
2614 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2615 cmd_byte_map[qm_port->is_directed][ev[3].op],
2616 DLB2_QE_CMD_BYTE + 8);
2618 /* Store priority, scheduling type, and queue ID in the sched
2619 * word array because these values are re-used when the
2620 * destination is a directed queue.
2622 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2623 sched_type[0] << 8 |
2625 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2626 sched_type[1] << 8 |
2628 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2629 sched_type[2] << 8 |
2631 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2632 sched_type[3] << 8 |
2635 /* Store the event priority, scheduling type, and queue ID in
2637 * sse_qe[0][31:16] = sched_word[0]
2638 * sse_qe[0][95:80] = sched_word[1]
2639 * sse_qe[1][31:16] = sched_word[2]
2640 * sse_qe[1][95:80] = sched_word[3]
2642 #define DLB2_QE_QID_SCHED_WORD 1
2643 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2645 DLB2_QE_QID_SCHED_WORD);
2646 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2648 DLB2_QE_QID_SCHED_WORD + 4);
2649 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2651 DLB2_QE_QID_SCHED_WORD);
2652 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2654 DLB2_QE_QID_SCHED_WORD + 4);
2656 /* If the destination is a load-balanced queue, store the lock
2657 * ID. If it is a directed queue, DLB places this field in
2658 * bytes 10-11 of the received QE, so we format it accordingly:
2659 * sse_qe[0][47:32] = dir queue ? sched_word[0] : flow_id[0]
2660 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2661 * sse_qe[1][47:32] = dir queue ? sched_word[2] : flow_id[2]
2662 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2664 #define DLB2_QE_LOCK_ID_WORD 2
2665 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2666 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2667 sched_word[0] : ev[0].flow_id,
2668 DLB2_QE_LOCK_ID_WORD);
2669 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2670 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2671 sched_word[1] : ev[1].flow_id,
2672 DLB2_QE_LOCK_ID_WORD + 4);
2673 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2674 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2675 sched_word[2] : ev[2].flow_id,
2676 DLB2_QE_LOCK_ID_WORD);
2677 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2678 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2679 sched_word[3] : ev[3].flow_id,
2680 DLB2_QE_LOCK_ID_WORD + 4);
2682 /* Store the event type and sub event type in the metadata:
2683 * sse_qe[0][15:0] = flow_id[0]
2684 * sse_qe[0][79:64] = flow_id[1]
2685 * sse_qe[1][15:0] = flow_id[2]
2686 * sse_qe[1][79:64] = flow_id[3]
2688 #define DLB2_QE_EV_TYPE_WORD 0
2689 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2690 ev[0].sub_event_type << 8 |
2692 DLB2_QE_EV_TYPE_WORD);
2693 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2694 ev[1].sub_event_type << 8 |
2696 DLB2_QE_EV_TYPE_WORD + 4);
2697 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2698 ev[2].sub_event_type << 8 |
2700 DLB2_QE_EV_TYPE_WORD);
2701 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2702 ev[3].sub_event_type << 8 |
2704 DLB2_QE_EV_TYPE_WORD + 4);
2706 /* Store the metadata to memory (use the double-precision
2707 * _mm_storeh_pd because there is no integer function for
2708 * storing the upper 64b):
2709 * qe[0] metadata = sse_qe[0][63:0]
2710 * qe[1] metadata = sse_qe[0][127:64]
2711 * qe[2] metadata = sse_qe[1][63:0]
2712 * qe[3] metadata = sse_qe[1][127:64]
2714 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2715 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2716 (__m128d)sse_qe[0]);
2717 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2718 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2719 (__m128d)sse_qe[1]);
2721 qe[0].data = ev[0].u64;
2722 qe[1].data = ev[1].u64;
2723 qe[2].data = ev[2].u64;
2724 qe[3].data = ev[3].u64;
2730 for (i = 0; i < num; i++) {
2732 cmd_byte_map[qm_port->is_directed][ev[i].op];
2733 qe[i].sched_type = sched_type[i];
2734 qe[i].data = ev[i].u64;
2735 qe[i].qid = queue_id[i];
2736 qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2737 qe[i].lock_id = ev[i].flow_id;
2738 if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2739 struct dlb2_msg_info *info =
2740 (struct dlb2_msg_info *)&qe[i].lock_id;
2742 info->qid = queue_id[i];
2743 info->sched_type = DLB2_SCHED_DIRECTED;
2744 info->priority = qe[i].priority;
2746 qe[i].u.event_type.major = ev[i].event_type;
2747 qe[i].u.event_type.sub = ev[i].sub_event_type;
2756 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2757 struct dlb2_port *qm_port,
2758 const struct rte_event ev[],
2759 uint8_t *sched_type,
2762 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2763 struct dlb2_eventdev_queue *ev_queue;
2764 uint16_t *cached_credits = NULL;
2765 struct dlb2_queue *qm_queue;
2767 ev_queue = &dlb2->ev_queues[ev->queue_id];
2768 qm_queue = &ev_queue->qm_queue;
2769 *queue_id = qm_queue->id;
2771 /* Ignore sched_type and hardware credits on release events */
2772 if (ev->op == RTE_EVENT_OP_RELEASE)
2775 if (!qm_queue->is_directed) {
2776 /* Load balanced destination queue */
2778 if (dlb2->version == DLB2_HW_V2) {
2779 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2780 rte_errno = -ENOSPC;
2783 cached_credits = &qm_port->cached_ldb_credits;
2785 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2786 rte_errno = -ENOSPC;
2789 cached_credits = &qm_port->cached_credits;
2791 switch (ev->sched_type) {
2792 case RTE_SCHED_TYPE_ORDERED:
2793 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2794 if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2795 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2797 rte_errno = -EINVAL;
2800 *sched_type = DLB2_SCHED_ORDERED;
2802 case RTE_SCHED_TYPE_ATOMIC:
2803 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2804 *sched_type = DLB2_SCHED_ATOMIC;
2806 case RTE_SCHED_TYPE_PARALLEL:
2807 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2808 if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2809 *sched_type = DLB2_SCHED_ORDERED;
2811 *sched_type = DLB2_SCHED_UNORDERED;
2814 DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2815 DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2816 rte_errno = -EINVAL;
2820 /* Directed destination queue */
2822 if (dlb2->version == DLB2_HW_V2) {
2823 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2824 rte_errno = -ENOSPC;
2827 cached_credits = &qm_port->cached_dir_credits;
2829 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2830 rte_errno = -ENOSPC;
2833 cached_credits = &qm_port->cached_credits;
2835 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2837 *sched_type = DLB2_SCHED_DIRECTED;
2842 case RTE_EVENT_OP_NEW:
2843 /* Check that a sw credit is available */
2844 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2845 rte_errno = -ENOSPC;
2848 ev_port->inflight_credits--;
2849 (*cached_credits)--;
2851 case RTE_EVENT_OP_FORWARD:
2852 /* Check for outstanding_releases underflow. If this occurs,
2853 * the application is not using the EVENT_OPs correctly; for
2854 * example, forwarding or releasing events that were not
2857 RTE_ASSERT(ev_port->outstanding_releases > 0);
2858 ev_port->outstanding_releases--;
2859 qm_port->issued_releases++;
2860 (*cached_credits)--;
2862 case RTE_EVENT_OP_RELEASE:
2863 ev_port->inflight_credits++;
2864 /* Check for outstanding_releases underflow. If this occurs,
2865 * the application is not using the EVENT_OPs correctly; for
2866 * example, forwarding or releasing events that were not
2869 RTE_ASSERT(ev_port->outstanding_releases > 0);
2870 ev_port->outstanding_releases--;
2871 qm_port->issued_releases++;
2873 /* Replenish s/w credits if enough are cached */
2874 dlb2_replenish_sw_credits(dlb2, ev_port);
2878 DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2879 DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2881 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2882 if (ev->op != RTE_EVENT_OP_RELEASE) {
2883 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2884 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2891 static inline uint16_t
2892 __dlb2_event_enqueue_burst(void *event_port,
2893 const struct rte_event events[],
2897 struct dlb2_eventdev_port *ev_port = event_port;
2898 struct dlb2_port *qm_port = &ev_port->qm_port;
2899 struct process_local_port_data *port_data;
2902 RTE_ASSERT(ev_port->enq_configured);
2903 RTE_ASSERT(events != NULL);
2907 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2910 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2911 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2915 memset(qm_port->qe4,
2917 DLB2_NUM_QES_PER_CACHE_LINE *
2918 sizeof(struct dlb2_enqueue_qe));
2920 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2921 const struct rte_event *ev = &events[i + j];
2922 int16_t thresh = qm_port->token_pop_thresh;
2925 qm_port->token_pop_mode == DELAYED_POP &&
2926 (ev->op == RTE_EVENT_OP_FORWARD ||
2927 ev->op == RTE_EVENT_OP_RELEASE) &&
2928 qm_port->issued_releases >= thresh - 1) {
2929 /* Insert the token pop QE and break out. This
2930 * may result in a partial HCW, but that is
2931 * simpler than supporting arbitrary QE
2934 dlb2_construct_token_pop_qe(qm_port, j);
2936 /* Reset the releases for the next QE batch */
2937 qm_port->issued_releases -= thresh;
2944 if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2953 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2954 sched_types, queue_ids);
2956 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2958 /* Don't include the token pop QE in the enqueue count */
2961 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
2964 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
2972 dlb2_event_enqueue_burst(void *event_port,
2973 const struct rte_event events[],
2976 return __dlb2_event_enqueue_burst(event_port, events, num, false);
2980 dlb2_event_enqueue_burst_delayed(void *event_port,
2981 const struct rte_event events[],
2984 return __dlb2_event_enqueue_burst(event_port, events, num, true);
2987 static inline uint16_t
2988 dlb2_event_enqueue(void *event_port,
2989 const struct rte_event events[])
2991 return __dlb2_event_enqueue_burst(event_port, events, 1, false);
2994 static inline uint16_t
2995 dlb2_event_enqueue_delayed(void *event_port,
2996 const struct rte_event events[])
2998 return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3002 dlb2_event_enqueue_new_burst(void *event_port,
3003 const struct rte_event events[],
3006 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3010 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3011 const struct rte_event events[],
3014 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3018 dlb2_event_enqueue_forward_burst(void *event_port,
3019 const struct rte_event events[],
3022 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3026 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3027 const struct rte_event events[],
3030 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3034 dlb2_event_release(struct dlb2_eventdev *dlb2,
3038 struct process_local_port_data *port_data;
3039 struct dlb2_eventdev_port *ev_port;
3040 struct dlb2_port *qm_port;
3043 if (port_id > dlb2->num_ports) {
3044 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3046 rte_errno = -EINVAL;
3050 ev_port = &dlb2->ev_ports[port_id];
3051 qm_port = &ev_port->qm_port;
3052 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3056 if (qm_port->is_directed) {
3058 goto sw_credit_update;
3066 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3067 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3068 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3069 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3072 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3073 int16_t thresh = qm_port->token_pop_thresh;
3075 if (qm_port->token_pop_mode == DELAYED_POP &&
3076 qm_port->issued_releases >= thresh - 1) {
3077 /* Insert the token pop QE */
3078 dlb2_construct_token_pop_qe(qm_port, j);
3080 /* Reset the releases for the next QE batch */
3081 qm_port->issued_releases -= thresh;
3088 qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3089 qm_port->issued_releases++;
3092 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3094 /* Don't include the token pop QE in the release count */
3099 /* each release returns one credit */
3100 if (unlikely(!ev_port->outstanding_releases)) {
3101 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3105 ev_port->outstanding_releases -= i;
3106 ev_port->inflight_credits += i;
3108 /* Replenish s/w credits if enough releases are performed */
3109 dlb2_replenish_sw_credits(dlb2, ev_port);
3113 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3115 uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ;
3117 /* increment port credits, and return to pool if exceeds threshold */
3118 if (!qm_port->is_directed) {
3119 if (qm_port->dlb2->version == DLB2_HW_V2) {
3120 qm_port->cached_ldb_credits += num;
3121 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3123 qm_port->credit_pool[DLB2_LDB_QUEUE],
3124 batch_size, __ATOMIC_SEQ_CST);
3125 qm_port->cached_ldb_credits -= batch_size;
3128 qm_port->cached_credits += num;
3129 if (qm_port->cached_credits >= 2 * batch_size) {
3131 qm_port->credit_pool[DLB2_COMBINED_POOL],
3132 batch_size, __ATOMIC_SEQ_CST);
3133 qm_port->cached_credits -= batch_size;
3137 if (qm_port->dlb2->version == DLB2_HW_V2) {
3138 qm_port->cached_dir_credits += num;
3139 if (qm_port->cached_dir_credits >= 2 * batch_size) {
3141 qm_port->credit_pool[DLB2_DIR_QUEUE],
3142 batch_size, __ATOMIC_SEQ_CST);
3143 qm_port->cached_dir_credits -= batch_size;
3146 qm_port->cached_credits += num;
3147 if (qm_port->cached_credits >= 2 * batch_size) {
3149 qm_port->credit_pool[DLB2_COMBINED_POOL],
3150 batch_size, __ATOMIC_SEQ_CST);
3151 qm_port->cached_credits -= batch_size;
3158 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3159 struct dlb2_eventdev_port *ev_port,
3160 struct dlb2_port *qm_port,
3162 uint64_t start_ticks)
3164 struct process_local_port_data *port_data;
3165 uint64_t elapsed_ticks;
3167 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3169 elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3171 /* Wait/poll time expired */
3172 if (elapsed_ticks >= timeout) {
3174 } else if (dlb2->umwait_allowed) {
3175 struct rte_power_monitor_cond pmc;
3176 volatile struct dlb2_dequeue_qe *cq_base;
3179 struct dlb2_dequeue_qe qe;
3181 uint64_t expected_value;
3182 volatile uint64_t *monitor_addr;
3184 qe_mask.qe.cq_gen = 1; /* set mask */
3186 cq_base = port_data->cq_base;
3187 monitor_addr = (volatile uint64_t *)(volatile void *)
3188 &cq_base[qm_port->cq_idx];
3189 monitor_addr++; /* cq_gen bit is in second 64bit location */
3191 if (qm_port->gen_bit)
3192 expected_value = qe_mask.raw_qe[1];
3196 pmc.addr = monitor_addr;
3197 pmc.val = expected_value;
3198 pmc.mask = qe_mask.raw_qe[1];
3199 pmc.size = sizeof(uint64_t);
3201 rte_power_monitor(&pmc, timeout + start_ticks);
3203 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3205 uint64_t poll_interval = dlb2->poll_interval;
3206 uint64_t curr_ticks = rte_get_timer_cycles();
3207 uint64_t init_ticks = curr_ticks;
3209 while ((curr_ticks - start_ticks < timeout) &&
3210 (curr_ticks - init_ticks < poll_interval))
3211 curr_ticks = rte_get_timer_cycles();
3217 static __rte_noinline int
3218 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3219 struct dlb2_port *qm_port,
3220 struct rte_event *events,
3221 struct dlb2_dequeue_qe *qes,
3224 uint8_t *qid_mappings = qm_port->qid_mappings;
3227 for (i = 0, num = 0; i < cnt; i++) {
3228 struct dlb2_dequeue_qe *qe = &qes[i];
3229 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3230 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3231 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3232 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3233 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3236 /* Fill in event information.
3237 * Note that flow_id must be embedded in the data by
3238 * the app, such as the mbuf RSS hash field if the data
3241 if (unlikely(qe->error)) {
3242 DLB2_LOG_ERR("QE error bit ON\n");
3243 DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3244 dlb2_consume_qe_immediate(qm_port, 1);
3245 continue; /* Ignore */
3248 events[num].u64 = qe->data;
3249 events[num].flow_id = qe->flow_id;
3250 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3251 events[num].event_type = qe->u.event_type.major;
3252 events[num].sub_event_type = qe->u.event_type.sub;
3253 events[num].sched_type = sched_type_map[qe->sched_type];
3254 events[num].impl_opaque = qe->qid_depth;
3256 /* qid not preserved for directed queues */
3257 if (qm_port->is_directed)
3258 evq_id = ev_port->link[0].queue_id;
3260 evq_id = qid_mappings[qe->qid];
3262 events[num].queue_id = evq_id;
3264 ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3266 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3270 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3276 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3277 struct dlb2_port *qm_port,
3278 struct rte_event *events,
3279 struct dlb2_dequeue_qe *qes)
3281 int sched_type_map[] = {
3282 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3283 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3284 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3285 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3287 const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3288 uint8_t *qid_mappings = qm_port->qid_mappings;
3291 /* In the unlikely case that any of the QE error bits are set, process
3292 * them one at a time.
3294 if (unlikely(qes[0].error || qes[1].error ||
3295 qes[2].error || qes[3].error))
3296 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3299 events[0].u64 = qes[0].data;
3300 events[1].u64 = qes[1].data;
3301 events[2].u64 = qes[2].data;
3302 events[3].u64 = qes[3].data;
3304 /* Construct the metadata portion of two struct rte_events
3305 * in one 128b SSE register. Event metadata is constructed in the SSE
3306 * registers like so:
3307 * sse_evt[0][63:0]: event[0]'s metadata
3308 * sse_evt[0][127:64]: event[1]'s metadata
3309 * sse_evt[1][63:0]: event[2]'s metadata
3310 * sse_evt[1][127:64]: event[3]'s metadata
3312 sse_evt[0] = _mm_setzero_si128();
3313 sse_evt[1] = _mm_setzero_si128();
3315 /* Convert the hardware queue ID to an event queue ID and store it in
3317 * sse_evt[0][47:40] = qid_mappings[qes[0].qid]
3318 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3319 * sse_evt[1][47:40] = qid_mappings[qes[2].qid]
3320 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3322 #define DLB_EVENT_QUEUE_ID_BYTE 5
3323 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3324 qid_mappings[qes[0].qid],
3325 DLB_EVENT_QUEUE_ID_BYTE);
3326 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3327 qid_mappings[qes[1].qid],
3328 DLB_EVENT_QUEUE_ID_BYTE + 8);
3329 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3330 qid_mappings[qes[2].qid],
3331 DLB_EVENT_QUEUE_ID_BYTE);
3332 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3333 qid_mappings[qes[3].qid],
3334 DLB_EVENT_QUEUE_ID_BYTE + 8);
3336 /* Convert the hardware priority to an event priority and store it in
3337 * the metadata, while also returning the queue depth status
3338 * value captured by the hardware, storing it in impl_opaque, which can
3339 * be read by the application but not modified
3340 * sse_evt[0][55:48] = DLB2_TO_EV_PRIO(qes[0].priority)
3341 * sse_evt[0][63:56] = qes[0].qid_depth
3342 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3343 * sse_evt[0][127:120] = qes[1].qid_depth
3344 * sse_evt[1][55:48] = DLB2_TO_EV_PRIO(qes[2].priority)
3345 * sse_evt[1][63:56] = qes[2].qid_depth
3346 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3347 * sse_evt[1][127:120] = qes[3].qid_depth
3349 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3350 #define DLB_BYTE_SHIFT 8
3352 _mm_insert_epi16(sse_evt[0],
3353 DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3354 (qes[0].qid_depth << DLB_BYTE_SHIFT),
3355 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3357 _mm_insert_epi16(sse_evt[0],
3358 DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3359 (qes[1].qid_depth << DLB_BYTE_SHIFT),
3360 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3362 _mm_insert_epi16(sse_evt[1],
3363 DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3364 (qes[2].qid_depth << DLB_BYTE_SHIFT),
3365 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3367 _mm_insert_epi16(sse_evt[1],
3368 DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3369 (qes[3].qid_depth << DLB_BYTE_SHIFT),
3370 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3372 /* Write the event type, sub event type, and flow_id to the event
3374 * sse_evt[0][31:0] = qes[0].flow_id |
3375 * qes[0].u.event_type.major << 28 |
3376 * qes[0].u.event_type.sub << 20;
3377 * sse_evt[0][95:64] = qes[1].flow_id |
3378 * qes[1].u.event_type.major << 28 |
3379 * qes[1].u.event_type.sub << 20;
3380 * sse_evt[1][31:0] = qes[2].flow_id |
3381 * qes[2].u.event_type.major << 28 |
3382 * qes[2].u.event_type.sub << 20;
3383 * sse_evt[1][95:64] = qes[3].flow_id |
3384 * qes[3].u.event_type.major << 28 |
3385 * qes[3].u.event_type.sub << 20;
3387 #define DLB_EVENT_EV_TYPE_DW 0
3388 #define DLB_EVENT_EV_TYPE_SHIFT 28
3389 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3390 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3392 qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3393 qes[0].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3394 DLB_EVENT_EV_TYPE_DW);
3395 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3397 qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3398 qes[1].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3399 DLB_EVENT_EV_TYPE_DW + 2);
3400 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3402 qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3403 qes[2].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3404 DLB_EVENT_EV_TYPE_DW);
3405 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3407 qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3408 qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3409 DLB_EVENT_EV_TYPE_DW + 2);
3411 /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3413 * sse_evt[0][39:32] = sched_type_map[qes[0].sched_type] << 6
3414 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3415 * sse_evt[1][39:32] = sched_type_map[qes[2].sched_type] << 6
3416 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3418 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3419 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3420 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3421 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3422 DLB_EVENT_SCHED_TYPE_BYTE);
3423 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3424 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3425 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3426 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3427 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3428 DLB_EVENT_SCHED_TYPE_BYTE);
3429 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3430 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3431 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3433 /* Store the metadata to the event (use the double-precision
3434 * _mm_storeh_pd because there is no integer function for storing the
3436 * events[0].event = sse_evt[0][63:0]
3437 * events[1].event = sse_evt[0][127:64]
3438 * events[2].event = sse_evt[1][63:0]
3439 * events[3].event = sse_evt[1][127:64]
3441 _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3442 _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3443 _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3444 _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3446 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3447 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3448 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3449 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3452 ev_port->stats.queue[events[0].queue_id].
3453 qid_depth[qes[0].qid_depth],
3456 ev_port->stats.queue[events[1].queue_id].
3457 qid_depth[qes[1].qid_depth],
3460 ev_port->stats.queue[events[2].queue_id].
3461 qid_depth[qes[2].qid_depth],
3464 ev_port->stats.queue[events[3].queue_id].
3465 qid_depth[qes[3].qid_depth],
3468 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3473 static __rte_always_inline int
3474 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3476 volatile struct dlb2_dequeue_qe *cq_addr;
3477 uint8_t xor_mask[2] = {0x0F, 0x00};
3478 const uint8_t and_mask = 0x0F;
3479 __m128i *qes = (__m128i *)qe;
3480 uint8_t gen_bits, gen_bit;
3484 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3486 idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3487 /* Load the next 4 QEs */
3488 addr[0] = (uintptr_t)&cq_addr[idx];
3489 addr[1] = (uintptr_t)&cq_addr[(idx + 4) & qm_port->cq_depth_mask];
3490 addr[2] = (uintptr_t)&cq_addr[(idx + 8) & qm_port->cq_depth_mask];
3491 addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3493 /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3494 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3495 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3496 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3497 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3499 /* Correct the xor_mask for wrap-around QEs */
3500 gen_bit = qm_port->gen_bit;
3501 xor_mask[gen_bit] ^= !!((idx + 4) > qm_port->cq_depth_mask) << 1;
3502 xor_mask[gen_bit] ^= !!((idx + 8) > qm_port->cq_depth_mask) << 2;
3503 xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3505 /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3506 * valid, then QEs[0:N-1] are too.
3508 qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3509 rte_compiler_barrier();
3510 qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3511 rte_compiler_barrier();
3512 qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3513 rte_compiler_barrier();
3514 qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3516 /* Extract and combine the gen bits */
3517 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3518 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3519 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3520 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3522 /* XOR the combined bits such that a 1 represents a valid QE */
3523 gen_bits ^= xor_mask[gen_bit];
3525 /* Mask off gen bits we don't care about */
3526 gen_bits &= and_mask;
3528 return __builtin_popcount(gen_bits);
3532 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3533 struct rte_event *events,
3539 __m128i v_qe_status,
3540 uint32_t valid_events)
3542 /* Look up the event QIDs, using the hardware QIDs to index the
3543 * port's QID mapping.
3545 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3546 * passed along in registers as the QE data is required later.
3548 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3549 * 32-bit slice of each QE, so makes up a full SSE register. This
3550 * allows parallel processing of 4x QEs in a single register.
3553 __m128i v_qid_done = {0};
3554 int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3555 int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3556 int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3557 int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3559 int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3560 int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3561 int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3562 int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3564 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3565 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3566 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3567 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3569 /* Schedule field remapping using byte shuffle
3570 * - Full byte containing sched field handled here (op, rsvd are zero)
3571 * - Note sanitizing the register requires two masking ANDs:
3572 * 1) to strip prio/msg_type from byte for correct shuffle lookup
3573 * 2) to strip any non-sched-field lanes from any results to OR later
3574 * - Final byte result is >> 10 to another byte-lane inside the u32.
3575 * This makes the final combination OR easier to make the rte_event.
3577 __m128i v_sched_done;
3578 __m128i v_sched_bits;
3580 static const uint8_t sched_type_map[16] = {
3581 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3582 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3583 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3584 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3586 static const uint8_t sched_and_mask[16] = {
3587 0x00, 0x00, 0x00, 0x03,
3588 0x00, 0x00, 0x00, 0x03,
3589 0x00, 0x00, 0x00, 0x03,
3590 0x00, 0x00, 0x00, 0x03,
3592 const __m128i v_sched_map = _mm_loadu_si128(
3593 (const __m128i *)sched_type_map);
3594 __m128i v_sched_mask = _mm_loadu_si128(
3595 (const __m128i *)&sched_and_mask);
3596 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3597 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3599 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3601 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3604 /* Priority handling
3605 * - QE provides 3 bits of priority
3606 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3607 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3609 __m128i v_prio_done;
3611 static const uint8_t prio_mask[16] = {
3612 0x00, 0x00, 0x00, 0x07 << 5,
3613 0x00, 0x00, 0x00, 0x07 << 5,
3614 0x00, 0x00, 0x00, 0x07 << 5,
3615 0x00, 0x00, 0x00, 0x07 << 5,
3617 __m128i v_prio_mask = _mm_loadu_si128(
3618 (const __m128i *)prio_mask);
3619 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3620 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3623 /* Event Sub/Type handling:
3624 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3625 * to get the sub/ev type data into rte_event location, clearing the
3626 * lower 20 bits in the process.
3628 __m128i v_types_done;
3630 static const uint8_t event_mask[16] = {
3631 0x0f, 0x00, 0x00, 0x00,
3632 0x0f, 0x00, 0x00, 0x00,
3633 0x0f, 0x00, 0x00, 0x00,
3634 0x0f, 0x00, 0x00, 0x00,
3636 static const uint8_t sub_event_mask[16] = {
3637 0xff, 0x00, 0x00, 0x00,
3638 0xff, 0x00, 0x00, 0x00,
3639 0xff, 0x00, 0x00, 0x00,
3640 0xff, 0x00, 0x00, 0x00,
3642 static const uint8_t flow_mask[16] = {
3643 0xff, 0xff, 0x00, 0x00,
3644 0xff, 0xff, 0x00, 0x00,
3645 0xff, 0xff, 0x00, 0x00,
3646 0xff, 0xff, 0x00, 0x00,
3648 __m128i v_event_mask = _mm_loadu_si128(
3649 (const __m128i *)event_mask);
3650 __m128i v_sub_event_mask = _mm_loadu_si128(
3651 (const __m128i *)sub_event_mask);
3652 __m128i v_flow_mask = _mm_loadu_si128(
3653 (const __m128i *)flow_mask);
3654 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3655 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3656 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3657 v_type = _mm_slli_epi32(v_type, 8);
3658 v_types_done = _mm_or_si128(v_type, v_sub);
3659 v_types_done = _mm_slli_epi32(v_types_done, 20);
3660 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3661 v_types_done = _mm_or_si128(v_types_done, v_flow);
3664 /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3665 * with the rte_event, allowing unpacks to move/blend with payload.
3667 __m128i v_q_s_p_done;
3669 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3670 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3671 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3674 __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3676 /* Unpack evs into u64 metadata, then indiv events */
3677 v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3678 v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3680 switch (valid_events) {
3682 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3683 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3684 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3687 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3688 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3691 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3692 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3693 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3696 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3697 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3701 static __rte_always_inline int
3702 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3703 uint32_t max_events)
3705 /* Using unmasked idx for perf, and masking manually */
3706 uint16_t idx = qm_port->cq_idx_unmasked;
3707 volatile struct dlb2_dequeue_qe *cq_addr;
3709 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3711 uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3712 qm_port->cq_depth_mask];
3713 uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx + 8) &
3714 qm_port->cq_depth_mask];
3715 uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx + 4) &
3716 qm_port->cq_depth_mask];
3717 uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx + 0) &
3718 qm_port->cq_depth_mask];
3720 /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3721 __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3722 rte_compiler_barrier();
3723 __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3724 rte_compiler_barrier();
3725 __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3726 rte_compiler_barrier();
3727 __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3729 /* Generate the pkt_shuffle mask;
3730 * - Avoids load in otherwise load-heavy section of code
3731 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3733 const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3734 __m128i v_zeros = _mm_setzero_si128();
3735 __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3736 __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3738 /* Extract u32 components required from the QE
3739 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3740 * - QE[96 to 127] for status (cq gen bit, error)
3742 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3744 __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3745 __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3746 __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3747 __m128i v_qe_meta = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3749 /* Status byte (gen_bit, error) handling:
3750 * - Shuffle to lanes 0,1,2,3, clear all others
3751 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3752 * - Shift right by 2 for error bit to MSB, movemask to scalar
3754 __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3755 __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3756 int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3758 /* Expected vs Reality of QE Gen bits
3759 * - cq_rolling_mask provides expected bits
3760 * - QE loads, unpacks/shuffle and movemask provides reality
3761 * - XOR of the two gives bitmask of new packets
3762 * - POPCNT to get the number of new events
3764 uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3765 uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3766 uint32_t count_new = __builtin_popcount(qe_xor_bits);
3767 count_new = RTE_MIN(count_new, max_events);
3771 /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3773 uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3774 uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3775 uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3776 uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3778 /* shifted out of m2 into MSB of m */
3779 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3781 /* shifted out of m "looped back" into MSB of m2 */
3782 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3784 /* Prefetch the next QEs - should run as IPC instead of cycles */
3785 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3786 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3787 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3788 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3790 /* Convert QEs from XMM regs to events and store events directly */
3791 _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3792 v_qe_0, v_qe_meta, v_qe_status, count_new);
3798 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3800 uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3802 qm_port->cq_idx_unmasked = idx;
3803 qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3804 qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3807 static inline int16_t
3808 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3809 struct dlb2_eventdev_port *ev_port,
3810 struct rte_event *events,
3812 uint64_t dequeue_timeout_ticks)
3814 uint64_t start_ticks = 0ULL;
3815 struct dlb2_port *qm_port;
3820 qm_port = &ev_port->qm_port;
3821 use_scalar = qm_port->use_scalar;
3823 if (!dlb2->global_dequeue_wait)
3824 timeout = dequeue_timeout_ticks;
3826 timeout = dlb2->global_dequeue_wait_ticks;
3828 start_ticks = rte_get_timer_cycles();
3830 use_scalar = use_scalar || (max_num & 0x3);
3832 while (num < max_num) {
3833 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3836 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3837 num_avail = RTE_MIN(num_avail, max_num - num);
3838 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3839 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3840 num += dlb2_process_dequeue_four_qes(ev_port,
3845 num += dlb2_process_dequeue_qes(ev_port,
3850 } else { /* !use_scalar */
3851 num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3855 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3856 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3861 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3862 timeout, start_ticks))
3867 qm_port->owed_tokens += num;
3870 if (qm_port->token_pop_mode == AUTO_POP)
3871 dlb2_consume_qe_immediate(qm_port, num);
3873 ev_port->outstanding_releases += num;
3875 dlb2_port_credits_inc(qm_port, num);
3881 static __rte_always_inline int
3882 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3885 uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3886 {0x00, 0x01, 0x03, 0x07} };
3887 uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3888 volatile struct dlb2_dequeue_qe *cq_addr;
3889 __m128i *qes = (__m128i *)qe;
3890 uint64_t *cache_line_base;
3893 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3894 cq_addr = &cq_addr[qm_port->cq_idx];
3896 cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3897 *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3899 /* Load the next CQ cache line from memory. Pack these reads as tight
3900 * as possible to reduce the chance that DLB invalidates the line while
3901 * the CPU is reading it. Read the cache line backwards to ensure that
3902 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3904 * (Valid QEs start at &qe[offset])
3906 qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3907 qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3908 qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3909 qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3911 /* Evict the cache line ASAP */
3912 rte_cldemote(cache_line_base);
3914 /* Extract and combine the gen bits */
3915 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3916 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3917 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3918 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3920 /* XOR the combined bits such that a 1 represents a valid QE */
3921 gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
3923 /* Mask off gen bits we don't care about */
3924 gen_bits &= and_mask[*offset];
3926 return __builtin_popcount(gen_bits);
3929 static inline int16_t
3930 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
3931 struct dlb2_eventdev_port *ev_port,
3932 struct rte_event *events,
3934 uint64_t dequeue_timeout_ticks)
3937 uint64_t start_ticks = 0ULL;
3938 struct dlb2_port *qm_port;
3941 qm_port = &ev_port->qm_port;
3943 /* We have a special implementation for waiting. Wait can be:
3944 * 1) no waiting at all
3946 * 3) wait for interrupt. If wakeup and poll time
3947 * has expired, then return to caller
3948 * 4) umonitor/umwait repeatedly up to poll time
3951 /* If configured for per dequeue wait, then use wait value provided
3952 * to this API. Otherwise we must use the global
3953 * value from eventdev config time.
3955 if (!dlb2->global_dequeue_wait)
3956 timeout = dequeue_timeout_ticks;
3958 timeout = dlb2->global_dequeue_wait_ticks;
3960 start_ticks = rte_get_timer_cycles();
3962 while (num < max_num) {
3963 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3967 /* Copy up to 4 QEs from the current cache line into qes */
3968 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
3970 /* But don't process more than the user requested */
3971 num_avail = RTE_MIN(num_avail, max_num - num);
3973 dlb2_inc_cq_idx(qm_port, num_avail);
3975 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3976 num += dlb2_process_dequeue_four_qes(ev_port,
3981 num += dlb2_process_dequeue_qes(ev_port,
3986 else if ((timeout == 0) || (num > 0))
3987 /* Not waiting in any form, or 1+ events received? */
3989 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3990 timeout, start_ticks))
3994 qm_port->owed_tokens += num;
3997 if (qm_port->token_pop_mode == AUTO_POP)
3998 dlb2_consume_qe_immediate(qm_port, num);
4000 ev_port->outstanding_releases += num;
4002 dlb2_port_credits_inc(qm_port, num);
4009 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4012 struct dlb2_eventdev_port *ev_port = event_port;
4013 struct dlb2_port *qm_port = &ev_port->qm_port;
4014 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4017 RTE_ASSERT(ev_port->setup_done);
4018 RTE_ASSERT(ev != NULL);
4020 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4021 uint16_t out_rels = ev_port->outstanding_releases;
4023 dlb2_event_release(dlb2, ev_port->id, out_rels);
4025 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4028 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4029 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4031 cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4033 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4034 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4040 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4042 return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4046 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4047 uint16_t num, uint64_t wait)
4049 struct dlb2_eventdev_port *ev_port = event_port;
4050 struct dlb2_port *qm_port = &ev_port->qm_port;
4051 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4054 RTE_ASSERT(ev_port->setup_done);
4055 RTE_ASSERT(ev != NULL);
4057 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4058 uint16_t out_rels = ev_port->outstanding_releases;
4060 dlb2_event_release(dlb2, ev_port->id, out_rels);
4062 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4065 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4066 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4068 cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4070 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4071 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4076 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4079 return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4083 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4085 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4086 eventdev_stop_flush_t flush;
4087 struct rte_event ev;
4092 flush = dev->dev_ops->dev_stop_flush;
4093 dev_id = dev->data->dev_id;
4094 arg = dev->data->dev_stop_flush_arg;
4096 while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4098 flush(dev_id, ev, arg);
4100 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4103 ev.op = RTE_EVENT_OP_RELEASE;
4105 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4108 /* Enqueue any additional outstanding releases */
4109 ev.op = RTE_EVENT_OP_RELEASE;
4111 for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4112 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4116 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4117 struct dlb2_eventdev_queue *queue)
4119 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4120 struct dlb2_get_ldb_queue_depth_args cfg;
4123 cfg.queue_id = queue->qm_queue.id;
4125 ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4127 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4128 ret, dlb2_error_strings[cfg.response.status]);
4132 return cfg.response.id;
4136 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4137 struct dlb2_eventdev_queue *queue)
4139 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4140 struct dlb2_get_dir_queue_depth_args cfg;
4143 cfg.queue_id = queue->qm_queue.id;
4145 ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4147 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4148 ret, dlb2_error_strings[cfg.response.status]);
4152 return cfg.response.id;
4156 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4157 struct dlb2_eventdev_queue *queue)
4159 if (queue->qm_queue.is_directed)
4160 return dlb2_get_dir_queue_depth(dlb2, queue);
4162 return dlb2_get_ldb_queue_depth(dlb2, queue);
4166 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4167 struct dlb2_eventdev_queue *queue)
4169 return dlb2_get_queue_depth(dlb2, queue) == 0;
4173 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4177 for (i = 0; i < dlb2->num_queues; i++) {
4178 if (dlb2->ev_queues[i].num_links == 0)
4180 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4188 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4192 for (i = 0; i < dlb2->num_queues; i++) {
4193 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4201 dlb2_drain(struct rte_eventdev *dev)
4203 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4204 struct dlb2_eventdev_port *ev_port = NULL;
4208 dev_id = dev->data->dev_id;
4210 while (!dlb2_linked_queues_empty(dlb2)) {
4211 /* Flush all the ev_ports, which will drain all their connected
4214 for (i = 0; i < dlb2->num_ports; i++)
4215 dlb2_flush_port(dev, i);
4218 /* The queues are empty, but there may be events left in the ports. */
4219 for (i = 0; i < dlb2->num_ports; i++)
4220 dlb2_flush_port(dev, i);
4222 /* If the domain's queues are empty, we're done. */
4223 if (dlb2_queues_empty(dlb2))
4226 /* Else, there must be at least one unlinked load-balanced queue.
4227 * Select a load-balanced port with which to drain the unlinked
4230 for (i = 0; i < dlb2->num_ports; i++) {
4231 ev_port = &dlb2->ev_ports[i];
4233 if (!ev_port->qm_port.is_directed)
4237 if (i == dlb2->num_ports) {
4238 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4243 rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4246 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4251 for (i = 0; i < dlb2->num_queues; i++) {
4255 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4261 /* Link the ev_port to the queue */
4262 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4264 DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4269 /* Flush the queue */
4270 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4271 dlb2_flush_port(dev, ev_port->id);
4273 /* Drain any extant events in the ev_port. */
4274 dlb2_flush_port(dev, ev_port->id);
4276 /* Unlink the ev_port from the queue */
4277 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4279 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4287 dlb2_eventdev_stop(struct rte_eventdev *dev)
4289 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4291 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4293 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4294 DLB2_LOG_DBG("Internal error: already stopped\n");
4295 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4297 } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4298 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4299 (int)dlb2->run_state);
4300 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4304 dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4306 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4310 dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4314 dlb2_eventdev_close(struct rte_eventdev *dev)
4316 dlb2_hw_reset_sched_domain(dev, false);
4322 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4327 /* This function intentionally left blank. */
4331 dlb2_eventdev_port_release(void *port)
4333 struct dlb2_eventdev_port *ev_port = port;
4334 struct dlb2_port *qm_port;
4337 qm_port = &ev_port->qm_port;
4338 if (qm_port->config_state == DLB2_CONFIGURED)
4339 dlb2_free_qe_mem(qm_port);
4344 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4345 uint64_t *timeout_ticks)
4348 uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4350 *timeout_ticks = ns * cycles_per_ns;
4356 dlb2_entry_points_init(struct rte_eventdev *dev)
4358 struct dlb2_eventdev *dlb2;
4360 /* Expose PMD's eventdev interface */
4361 static struct rte_eventdev_ops dlb2_eventdev_entry_ops = {
4362 .dev_infos_get = dlb2_eventdev_info_get,
4363 .dev_configure = dlb2_eventdev_configure,
4364 .dev_start = dlb2_eventdev_start,
4365 .dev_stop = dlb2_eventdev_stop,
4366 .dev_close = dlb2_eventdev_close,
4367 .queue_def_conf = dlb2_eventdev_queue_default_conf_get,
4368 .queue_setup = dlb2_eventdev_queue_setup,
4369 .queue_release = dlb2_eventdev_queue_release,
4370 .port_def_conf = dlb2_eventdev_port_default_conf_get,
4371 .port_setup = dlb2_eventdev_port_setup,
4372 .port_release = dlb2_eventdev_port_release,
4373 .port_link = dlb2_eventdev_port_link,
4374 .port_unlink = dlb2_eventdev_port_unlink,
4375 .port_unlinks_in_progress =
4376 dlb2_eventdev_port_unlinks_in_progress,
4377 .timeout_ticks = dlb2_eventdev_timeout_ticks,
4378 .dump = dlb2_eventdev_dump,
4379 .xstats_get = dlb2_eventdev_xstats_get,
4380 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4381 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4382 .xstats_reset = dlb2_eventdev_xstats_reset,
4383 .dev_selftest = test_dlb2_eventdev,
4386 /* Expose PMD's eventdev interface */
4388 dev->dev_ops = &dlb2_eventdev_entry_ops;
4389 dev->enqueue = dlb2_event_enqueue;
4390 dev->enqueue_burst = dlb2_event_enqueue_burst;
4391 dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4392 dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4394 dlb2 = dev->data->dev_private;
4395 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4396 dev->dequeue = dlb2_event_dequeue_sparse;
4397 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4399 dev->dequeue = dlb2_event_dequeue;
4400 dev->dequeue_burst = dlb2_event_dequeue_burst;
4405 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4407 struct dlb2_devargs *dlb2_args)
4409 struct dlb2_eventdev *dlb2;
4412 dlb2 = dev->data->dev_private;
4414 dlb2->event_dev = dev; /* backlink */
4416 evdev_dlb2_default_info.driver_name = name;
4418 dlb2->max_num_events_override = dlb2_args->max_num_events;
4419 dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4420 dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4421 dlb2->poll_interval = dlb2_args->poll_interval;
4422 dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4423 dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4424 dlb2->vector_opts_disabled = dlb2_args->vector_opts_disabled;
4426 err = dlb2_iface_open(&dlb2->qm_instance, name);
4428 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4433 err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4436 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4441 err = dlb2_hw_query_resources(dlb2);
4443 DLB2_LOG_ERR("get resources err=%d for %s\n",
4448 dlb2_iface_hardware_init(&dlb2->qm_instance);
4450 err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4452 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4457 /* Complete xtstats runtime initialization */
4458 err = dlb2_xstats_init(dlb2);
4460 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4464 /* Initialize each port's token pop mode */
4465 for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4466 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4468 rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4470 dlb2_iface_low_level_io_init();
4472 dlb2_entry_points_init(dev);
4474 dlb2_init_queue_depth_thresholds(dlb2,
4475 dlb2_args->qid_depth_thresholds.val);
4481 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4484 struct dlb2_eventdev *dlb2;
4487 dlb2 = dev->data->dev_private;
4489 evdev_dlb2_default_info.driver_name = name;
4491 err = dlb2_iface_open(&dlb2->qm_instance, name);
4493 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4498 err = dlb2_hw_query_resources(dlb2);
4500 DLB2_LOG_ERR("get resources err=%d for %s\n",
4505 dlb2_iface_low_level_io_init();
4507 dlb2_entry_points_init(dev);
4513 dlb2_parse_params(const char *params,
4515 struct dlb2_devargs *dlb2_args,
4519 static const char * const args[] = { NUMA_NODE_ARG,
4520 DLB2_MAX_NUM_EVENTS,
4521 DLB2_NUM_DIR_CREDITS,
4523 DLB2_QID_DEPTH_THRESH_ARG,
4525 DLB2_POLL_INTERVAL_ARG,
4526 DLB2_SW_CREDIT_QUANTA_ARG,
4527 DLB2_DEPTH_THRESH_ARG,
4528 DLB2_VECTOR_OPTS_DISAB_ARG,
4531 if (params != NULL && params[0] != '\0') {
4532 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4534 if (kvlist == NULL) {
4536 "Ignoring unsupported parameters when creating device '%s'\n",
4539 int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4541 &dlb2_args->socket_id);
4543 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4545 rte_kvargs_free(kvlist);
4549 ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4551 &dlb2_args->max_num_events);
4553 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4555 rte_kvargs_free(kvlist);
4559 if (version == DLB2_HW_V2) {
4560 ret = rte_kvargs_process(kvlist,
4561 DLB2_NUM_DIR_CREDITS,
4562 set_num_dir_credits,
4563 &dlb2_args->num_dir_credits_override);
4565 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4567 rte_kvargs_free(kvlist);
4571 ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4573 &dlb2_args->dev_id);
4575 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4577 rte_kvargs_free(kvlist);
4581 if (version == DLB2_HW_V2) {
4582 ret = rte_kvargs_process(
4584 DLB2_QID_DEPTH_THRESH_ARG,
4585 set_qid_depth_thresh,
4586 &dlb2_args->qid_depth_thresholds);
4588 ret = rte_kvargs_process(
4590 DLB2_QID_DEPTH_THRESH_ARG,
4591 set_qid_depth_thresh_v2_5,
4592 &dlb2_args->qid_depth_thresholds);
4595 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4597 rte_kvargs_free(kvlist);
4601 ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4603 &dlb2_args->cos_id);
4605 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4607 rte_kvargs_free(kvlist);
4611 ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4613 &dlb2_args->poll_interval);
4615 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4617 rte_kvargs_free(kvlist);
4621 ret = rte_kvargs_process(kvlist,
4622 DLB2_SW_CREDIT_QUANTA_ARG,
4623 set_sw_credit_quanta,
4624 &dlb2_args->sw_credit_quanta);
4626 DLB2_LOG_ERR("%s: Error parsing sw xredit quanta parameter",
4628 rte_kvargs_free(kvlist);
4632 ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4633 set_default_depth_thresh,
4634 &dlb2_args->default_depth_thresh);
4636 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4638 rte_kvargs_free(kvlist);
4642 ret = rte_kvargs_process(kvlist,
4643 DLB2_VECTOR_OPTS_DISAB_ARG,
4644 set_vector_opts_disab,
4645 &dlb2_args->vector_opts_disabled);
4647 DLB2_LOG_ERR("%s: Error parsing vector opts disabled",
4649 rte_kvargs_free(kvlist);
4653 rte_kvargs_free(kvlist);
4658 RTE_LOG_REGISTER(eventdev_dlb2_log_level, pmd.event.dlb2, NOTICE);