1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2020 Intel Corporation
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
25 #include <rte_kvargs.h>
27 #include <rte_malloc.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
32 #include <rte_string_fns.h>
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
39 * Resources exposed to eventdev. Some values overridden at runtime using
40 * values returned by the DLB kernel driver.
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 .driver_name = "", /* probe will set */
47 .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
52 .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
54 .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 .max_event_priority_levels = DLB2_QID_PRIORITIES,
57 .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
59 .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 .max_single_link_event_port_queue_pairs =
63 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65 RTE_EVENT_DEV_CAP_EVENT_QOS |
66 RTE_EVENT_DEV_CAP_BURST_MODE |
67 RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68 RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
70 RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
73 struct process_local_port_data
74 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
77 dlb2_free_qe_mem(struct dlb2_port *qm_port)
82 rte_free(qm_port->qe4);
85 rte_free(qm_port->int_arm_qe);
86 qm_port->int_arm_qe = NULL;
88 rte_free(qm_port->consume_qe);
89 qm_port->consume_qe = NULL;
91 rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
92 dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
95 /* override defaults with value(s) provided on command line */
97 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
98 int *qid_depth_thresholds)
102 for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
103 if (qid_depth_thresholds[q] != 0)
104 dlb2->ev_queues[q].depth_threshold =
105 qid_depth_thresholds[q];
110 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
112 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
113 struct dlb2_hw_resource_info *dlb2_info = &handle->info;
117 /* Query driver resources provisioned for this device */
119 ret = dlb2_iface_get_num_resources(handle,
120 &dlb2->hw_rsrc_query_results);
122 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
126 /* Complete filling in device resource info returned to evdev app,
127 * overriding any default values.
128 * The capabilities (CAPs) were set at compile time.
131 if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
132 num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
134 num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
136 evdev_dlb2_default_info.max_event_queues =
137 dlb2->hw_rsrc_query_results.num_ldb_queues;
139 evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
141 if (dlb2->version == DLB2_HW_V2_5) {
142 evdev_dlb2_default_info.max_num_events =
143 dlb2->hw_rsrc_query_results.num_credits;
145 evdev_dlb2_default_info.max_num_events =
146 dlb2->hw_rsrc_query_results.num_ldb_credits;
148 /* Save off values used when creating the scheduling domain. */
150 handle->info.num_sched_domains =
151 dlb2->hw_rsrc_query_results.num_sched_domains;
153 if (dlb2->version == DLB2_HW_V2_5) {
154 handle->info.hw_rsrc_max.nb_events_limit =
155 dlb2->hw_rsrc_query_results.num_credits;
157 handle->info.hw_rsrc_max.nb_events_limit =
158 dlb2->hw_rsrc_query_results.num_ldb_credits;
160 handle->info.hw_rsrc_max.num_queues =
161 dlb2->hw_rsrc_query_results.num_ldb_queues +
162 dlb2->hw_rsrc_query_results.num_dir_ports;
164 handle->info.hw_rsrc_max.num_ldb_queues =
165 dlb2->hw_rsrc_query_results.num_ldb_queues;
167 handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
169 handle->info.hw_rsrc_max.num_dir_ports =
170 dlb2->hw_rsrc_query_results.num_dir_ports;
172 handle->info.hw_rsrc_max.reorder_window_size =
173 dlb2->hw_rsrc_query_results.num_hist_list_entries;
175 rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
180 #define DLB2_BASE_10 10
183 dlb2_string_to_int(int *result, const char *str)
188 if (str == NULL || result == NULL)
192 ret = strtol(str, &endptr, DLB2_BASE_10);
196 /* long int and int may be different width for some architectures */
197 if (ret < INT_MIN || ret > INT_MAX || endptr == str)
205 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
207 int *socket_id = opaque;
210 ret = dlb2_string_to_int(socket_id, value);
214 if (*socket_id > RTE_MAX_NUMA_NODES)
221 set_max_cq_depth(const char *key __rte_unused,
225 int *max_cq_depth = opaque;
228 if (value == NULL || opaque == NULL) {
229 DLB2_LOG_ERR("NULL pointer\n");
233 ret = dlb2_string_to_int(max_cq_depth, value);
237 if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
238 *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
239 !rte_is_power_of_2(*max_cq_depth)) {
240 DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 2\n",
241 DLB2_MIN_CQ_DEPTH_OVERRIDE,
242 DLB2_MAX_CQ_DEPTH_OVERRIDE);
250 set_max_num_events(const char *key __rte_unused,
254 int *max_num_events = opaque;
257 if (value == NULL || opaque == NULL) {
258 DLB2_LOG_ERR("NULL pointer\n");
262 ret = dlb2_string_to_int(max_num_events, value);
266 if (*max_num_events < 0 || *max_num_events >
267 DLB2_MAX_NUM_LDB_CREDITS) {
268 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
269 DLB2_MAX_NUM_LDB_CREDITS);
277 set_num_dir_credits(const char *key __rte_unused,
281 int *num_dir_credits = opaque;
284 if (value == NULL || opaque == NULL) {
285 DLB2_LOG_ERR("NULL pointer\n");
289 ret = dlb2_string_to_int(num_dir_credits, value);
293 if (*num_dir_credits < 0 ||
294 *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
295 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
296 DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
304 set_dev_id(const char *key __rte_unused,
308 int *dev_id = opaque;
311 if (value == NULL || opaque == NULL) {
312 DLB2_LOG_ERR("NULL pointer\n");
316 ret = dlb2_string_to_int(dev_id, value);
324 set_cos(const char *key __rte_unused,
328 enum dlb2_cos *cos_id = opaque;
332 if (value == NULL || opaque == NULL) {
333 DLB2_LOG_ERR("NULL pointer\n");
337 ret = dlb2_string_to_int(&x, value);
341 if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
343 "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
354 set_poll_interval(const char *key __rte_unused,
358 int *poll_interval = opaque;
361 if (value == NULL || opaque == NULL) {
362 DLB2_LOG_ERR("NULL pointer\n");
366 ret = dlb2_string_to_int(poll_interval, value);
374 set_sw_credit_quanta(const char *key __rte_unused,
378 int *sw_credit_quanta = opaque;
381 if (value == NULL || opaque == NULL) {
382 DLB2_LOG_ERR("NULL pointer\n");
386 ret = dlb2_string_to_int(sw_credit_quanta, value);
394 set_hw_credit_quanta(const char *key __rte_unused,
398 int *hw_credit_quanta = opaque;
401 if (value == NULL || opaque == NULL) {
402 DLB2_LOG_ERR("NULL pointer\n");
406 ret = dlb2_string_to_int(hw_credit_quanta, value);
414 set_default_depth_thresh(const char *key __rte_unused,
418 int *default_depth_thresh = opaque;
421 if (value == NULL || opaque == NULL) {
422 DLB2_LOG_ERR("NULL pointer\n");
426 ret = dlb2_string_to_int(default_depth_thresh, value);
434 set_vector_opts_enab(const char *key __rte_unused,
438 bool *dlb2_vector_opts_enabled = opaque;
440 if (value == NULL || opaque == NULL) {
441 DLB2_LOG_ERR("NULL pointer\n");
445 if ((*value == 'y') || (*value == 'Y'))
446 *dlb2_vector_opts_enabled = true;
448 *dlb2_vector_opts_enabled = false;
454 set_qid_depth_thresh(const char *key __rte_unused,
458 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
459 int first, last, thresh, i;
461 if (value == NULL || opaque == NULL) {
462 DLB2_LOG_ERR("NULL pointer\n");
466 /* command line override may take one of the following 3 forms:
467 * qid_depth_thresh=all:<threshold_value> ... all queues
468 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
469 * qid_depth_thresh=qid:<threshold_value> ... just one queue
471 if (sscanf(value, "all:%d", &thresh) == 1) {
473 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
474 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
475 /* we have everything we need */
476 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
479 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
483 if (first > last || first < 0 ||
484 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
485 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
489 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
490 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
491 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
495 for (i = first; i <= last; i++)
496 qid_thresh->val[i] = thresh; /* indexed by qid */
502 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
506 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
507 int first, last, thresh, i;
509 if (value == NULL || opaque == NULL) {
510 DLB2_LOG_ERR("NULL pointer\n");
514 /* command line override may take one of the following 3 forms:
515 * qid_depth_thresh=all:<threshold_value> ... all queues
516 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
517 * qid_depth_thresh=qid:<threshold_value> ... just one queue
519 if (sscanf(value, "all:%d", &thresh) == 1) {
521 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
522 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
523 /* we have everything we need */
524 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
527 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
531 if (first > last || first < 0 ||
532 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
533 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
537 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
538 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
539 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
543 for (i = first; i <= last; i++)
544 qid_thresh->val[i] = thresh; /* indexed by qid */
550 dlb2_eventdev_info_get(struct rte_eventdev *dev,
551 struct rte_event_dev_info *dev_info)
553 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
556 ret = dlb2_hw_query_resources(dlb2);
558 const struct rte_eventdev_data *data = dev->data;
560 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
562 /* fn is void, so fall through and return values set up in
567 /* Add num resources currently owned by this domain.
568 * These would become available if the scheduling domain were reset due
569 * to the application recalling eventdev_configure to *reconfigure* the
572 evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
573 evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
574 if (dlb2->version == DLB2_HW_V2_5) {
575 evdev_dlb2_default_info.max_num_events +=
578 evdev_dlb2_default_info.max_num_events +=
579 dlb2->max_ldb_credits;
581 evdev_dlb2_default_info.max_event_queues =
582 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
583 RTE_EVENT_MAX_QUEUES_PER_DEV);
585 evdev_dlb2_default_info.max_num_events =
586 RTE_MIN(evdev_dlb2_default_info.max_num_events,
587 dlb2->max_num_events_override);
589 *dev_info = evdev_dlb2_default_info;
593 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
594 const struct dlb2_hw_rsrcs *resources_asked,
595 uint8_t device_version)
598 struct dlb2_create_sched_domain_args *cfg;
600 if (resources_asked == NULL) {
601 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
606 /* Map generic qm resources to dlb2 resources */
607 cfg = &handle->cfg.resources;
609 /* DIR ports and queues */
611 cfg->num_dir_ports = resources_asked->num_dir_ports;
612 if (device_version == DLB2_HW_V2_5)
613 cfg->num_credits = resources_asked->num_credits;
615 cfg->num_dir_credits = resources_asked->num_dir_credits;
619 cfg->num_ldb_queues = resources_asked->num_ldb_queues;
623 cfg->cos_strict = 0; /* Best effort */
624 cfg->num_cos_ldb_ports[0] = 0;
625 cfg->num_cos_ldb_ports[1] = 0;
626 cfg->num_cos_ldb_ports[2] = 0;
627 cfg->num_cos_ldb_ports[3] = 0;
629 switch (handle->cos_id) {
631 cfg->num_ldb_ports = 0; /* no don't care ports */
632 cfg->num_cos_ldb_ports[0] =
633 resources_asked->num_ldb_ports;
636 cfg->num_ldb_ports = 0; /* no don't care ports */
637 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
640 cfg->num_ldb_ports = 0; /* no don't care ports */
641 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
644 cfg->num_ldb_ports = 0; /* no don't care ports */
645 cfg->num_cos_ldb_ports[3] =
646 resources_asked->num_ldb_ports;
648 case DLB2_COS_DEFAULT:
649 /* all ldb ports are don't care ports from a cos perspective */
651 resources_asked->num_ldb_ports;
655 if (device_version == DLB2_HW_V2)
656 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
658 cfg->num_atomic_inflights =
659 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
662 cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
663 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
665 if (device_version == DLB2_HW_V2_5) {
666 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
668 resources_asked->num_ldb_ports,
670 cfg->num_atomic_inflights,
671 cfg->num_hist_list_entries,
674 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
676 resources_asked->num_ldb_ports,
678 cfg->num_atomic_inflights,
679 cfg->num_hist_list_entries,
680 cfg->num_ldb_credits,
681 cfg->num_dir_credits);
684 /* Configure the QM */
686 ret = dlb2_iface_sched_domain_create(handle, cfg);
688 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
690 dlb2_error_strings[cfg->response.status]);
695 handle->domain_id = cfg->response.id;
696 handle->cfg.configured = true;
704 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
706 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
707 enum dlb2_configuration_state config_state;
710 dlb2_iface_domain_reset(dlb2);
712 /* Free all dynamically allocated port memory */
713 for (i = 0; i < dlb2->num_ports; i++)
714 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
716 /* If reconfiguring, mark the device's queues and ports as "previously
717 * configured." If the user doesn't reconfigure them, the PMD will
718 * reapply their previous configuration when the device is started.
720 config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
723 for (i = 0; i < dlb2->num_ports; i++) {
724 dlb2->ev_ports[i].qm_port.config_state = config_state;
725 /* Reset setup_done so ports can be reconfigured */
726 dlb2->ev_ports[i].setup_done = false;
727 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
728 dlb2->ev_ports[i].link[j].mapped = false;
731 for (i = 0; i < dlb2->num_queues; i++)
732 dlb2->ev_queues[i].qm_queue.config_state = config_state;
734 for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
735 dlb2->ev_queues[i].setup_done = false;
738 dlb2->num_ldb_ports = 0;
739 dlb2->num_dir_ports = 0;
740 dlb2->num_queues = 0;
741 dlb2->num_ldb_queues = 0;
742 dlb2->num_dir_queues = 0;
743 dlb2->configured = false;
746 /* Note: 1 QM instance per QM device, QM instance/device == event device */
748 dlb2_eventdev_configure(const struct rte_eventdev *dev)
750 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
751 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
752 struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
753 const struct rte_eventdev_data *data = dev->data;
754 const struct rte_event_dev_config *config = &data->dev_conf;
757 /* If this eventdev is already configured, we must release the current
758 * scheduling domain before attempting to configure a new one.
760 if (dlb2->configured) {
761 dlb2_hw_reset_sched_domain(dev, true);
762 ret = dlb2_hw_query_resources(dlb2);
764 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
770 if (config->nb_event_queues > rsrcs->num_queues) {
771 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
772 config->nb_event_queues,
776 if (config->nb_event_ports > (rsrcs->num_ldb_ports
777 + rsrcs->num_dir_ports)) {
778 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
779 config->nb_event_ports,
780 (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
783 if (config->nb_events_limit > rsrcs->nb_events_limit) {
784 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
785 config->nb_events_limit,
786 rsrcs->nb_events_limit);
790 if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
791 dlb2->global_dequeue_wait = false;
795 dlb2->global_dequeue_wait = true;
797 /* note size mismatch of timeout vals in eventdev lib. */
798 timeout32 = config->dequeue_timeout_ns;
800 dlb2->global_dequeue_wait_ticks =
801 timeout32 * (rte_get_timer_hz() / 1E9);
804 /* Does this platform support umonitor/umwait? */
805 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
806 dlb2->umwait_allowed = true;
808 rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
809 rsrcs->num_ldb_ports = config->nb_event_ports - rsrcs->num_dir_ports;
810 /* 1 dir queue per dir port */
811 rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
813 if (dlb2->version == DLB2_HW_V2_5) {
814 rsrcs->num_credits = 0;
815 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
816 rsrcs->num_credits = config->nb_events_limit;
818 /* Scale down nb_events_limit by 4 for directed credits,
819 * since there are 4x as many load-balanced credits.
821 rsrcs->num_ldb_credits = 0;
822 rsrcs->num_dir_credits = 0;
824 if (rsrcs->num_ldb_queues)
825 rsrcs->num_ldb_credits = config->nb_events_limit;
826 if (rsrcs->num_dir_ports)
827 rsrcs->num_dir_credits = config->nb_events_limit / 2;
828 if (dlb2->num_dir_credits_override != -1)
829 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
832 if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
833 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
837 dlb2->new_event_limit = config->nb_events_limit;
838 __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
840 /* Save number of ports/queues for this event dev */
841 dlb2->num_ports = config->nb_event_ports;
842 dlb2->num_queues = config->nb_event_queues;
843 dlb2->num_dir_ports = rsrcs->num_dir_ports;
844 dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
845 dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
846 dlb2->num_dir_queues = dlb2->num_dir_ports;
847 if (dlb2->version == DLB2_HW_V2_5) {
848 dlb2->credit_pool = rsrcs->num_credits;
849 dlb2->max_credits = rsrcs->num_credits;
851 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
852 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
853 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
854 dlb2->max_dir_credits = rsrcs->num_dir_credits;
857 dlb2->configured = true;
863 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
865 struct rte_event_port_conf *port_conf)
867 RTE_SET_USED(port_id);
868 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
870 port_conf->new_event_threshold = dlb2->new_event_limit;
871 port_conf->dequeue_depth = 32;
872 port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
873 port_conf->event_port_cfg = 0;
877 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
879 struct rte_event_queue_conf *queue_conf)
882 RTE_SET_USED(queue_id);
884 queue_conf->nb_atomic_flows = 1024;
885 queue_conf->nb_atomic_order_sequences = 64;
886 queue_conf->event_queue_cfg = 0;
887 queue_conf->priority = 0;
891 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
893 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
894 struct dlb2_get_sn_allocation_args cfg;
899 ret = dlb2_iface_get_sn_allocation(handle, &cfg);
901 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
902 ret, dlb2_error_strings[cfg.response.status]);
906 return cfg.response.id;
910 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
912 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
913 struct dlb2_set_sn_allocation_args cfg;
919 ret = dlb2_iface_set_sn_allocation(handle, &cfg);
921 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
922 ret, dlb2_error_strings[cfg.response.status]);
930 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
932 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
933 struct dlb2_get_sn_occupancy_args cfg;
938 ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
940 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
941 ret, dlb2_error_strings[cfg.response.status]);
945 return cfg.response.id;
948 /* Query the current sequence number allocations and, if they conflict with the
949 * requested LDB queue configuration, attempt to re-allocate sequence numbers.
950 * This is best-effort; if it fails, the PMD will attempt to configure the
951 * load-balanced queue and return an error.
954 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
955 const struct rte_event_queue_conf *queue_conf)
957 int grp_occupancy[DLB2_NUM_SN_GROUPS];
958 int grp_alloc[DLB2_NUM_SN_GROUPS];
959 int i, sequence_numbers;
961 sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
963 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
966 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
967 if (grp_alloc[i] < 0)
970 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
972 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
973 if (grp_occupancy[i] < 0)
976 /* DLB has at least one available slot for the requested
977 * sequence numbers, so no further configuration required.
979 if (grp_alloc[i] == sequence_numbers &&
980 grp_occupancy[i] < total_slots)
984 /* None of the sequence number groups are configured for the requested
985 * sequence numbers, so we have to reconfigure one of them. This is
986 * only possible if a group is not in use.
988 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
989 if (grp_occupancy[i] == 0)
993 if (i == DLB2_NUM_SN_GROUPS) {
994 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
995 __func__, sequence_numbers);
999 /* Attempt to configure slot i with the requested number of sequence
1000 * numbers. Ignore the return value -- if this fails, the error will be
1001 * caught during subsequent queue configuration.
1003 dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
1007 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
1008 struct dlb2_eventdev_queue *ev_queue,
1009 const struct rte_event_queue_conf *evq_conf)
1011 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1012 struct dlb2_queue *queue = &ev_queue->qm_queue;
1013 struct dlb2_create_ldb_queue_args cfg;
1016 int sched_type = -1;
1018 if (evq_conf == NULL)
1021 if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
1022 if (evq_conf->nb_atomic_order_sequences != 0)
1023 sched_type = RTE_SCHED_TYPE_ORDERED;
1025 sched_type = RTE_SCHED_TYPE_PARALLEL;
1027 sched_type = evq_conf->schedule_type;
1029 cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
1030 cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
1031 cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
1033 if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1034 cfg.num_sequence_numbers = 0;
1035 cfg.num_qid_inflights = 2048;
1038 /* App should set this to the number of hardware flows they want, not
1039 * the overall number of flows they're going to use. E.g. if app is
1040 * using 64 flows and sets compression to 64, best-case they'll get
1041 * 64 unique hashed flows in hardware.
1043 switch (evq_conf->nb_atomic_flows) {
1044 /* Valid DLB2 compression levels */
1049 case (1 * 1024): /* 1K */
1050 case (2 * 1024): /* 2K */
1051 case (4 * 1024): /* 4K */
1052 case (64 * 1024): /* 64K */
1053 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1056 /* Invalid compression level */
1057 cfg.lock_id_comp_level = 0; /* no compression */
1060 if (ev_queue->depth_threshold == 0) {
1061 cfg.depth_threshold = dlb2->default_depth_thresh;
1062 ev_queue->depth_threshold =
1063 dlb2->default_depth_thresh;
1065 cfg.depth_threshold = ev_queue->depth_threshold;
1067 ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1069 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1070 ret, dlb2_error_strings[cfg.response.status]);
1074 qm_qid = cfg.response.id;
1076 /* Save off queue config for debug, resource lookups, and reconfig */
1077 queue->num_qid_inflights = cfg.num_qid_inflights;
1078 queue->num_atm_inflights = cfg.num_atomic_inflights;
1080 queue->sched_type = sched_type;
1081 queue->config_state = DLB2_CONFIGURED;
1083 DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1085 cfg.num_atomic_inflights,
1086 cfg.num_sequence_numbers,
1087 cfg.num_qid_inflights);
1093 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1094 struct dlb2_eventdev_queue *ev_queue,
1095 const struct rte_event_queue_conf *queue_conf)
1097 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1100 if (queue_conf->nb_atomic_order_sequences)
1101 dlb2_program_sn_allocation(dlb2, queue_conf);
1103 qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1105 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1110 dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1112 ev_queue->qm_queue.id = qm_qid;
1117 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1121 for (i = 0; i < dlb2->num_queues; i++) {
1122 if (dlb2->ev_queues[i].setup_done &&
1123 dlb2->ev_queues[i].qm_queue.is_directed)
1131 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1132 struct dlb2_eventdev_queue *ev_queue)
1134 struct dlb2_eventdev_port *ev_port;
1137 for (i = 0; i < dlb2->num_ports; i++) {
1138 ev_port = &dlb2->ev_ports[i];
1140 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1141 if (!ev_port->link[j].valid ||
1142 ev_port->link[j].queue_id != ev_queue->id)
1145 ev_port->link[j].valid = false;
1146 ev_port->num_links--;
1150 ev_queue->num_links = 0;
1154 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1156 const struct rte_event_queue_conf *queue_conf)
1158 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1159 struct dlb2_eventdev_queue *ev_queue;
1162 if (queue_conf == NULL)
1165 if (ev_qid >= dlb2->num_queues)
1168 ev_queue = &dlb2->ev_queues[ev_qid];
1170 ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1171 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1172 ev_queue->id = ev_qid;
1173 ev_queue->conf = *queue_conf;
1175 if (!ev_queue->qm_queue.is_directed) {
1176 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1178 /* The directed queue isn't setup until link time, at which
1179 * point we know its directed port ID. Directed queue setup
1180 * will only fail if this queue is already setup or there are
1181 * no directed queues left to configure.
1185 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1187 if (ev_queue->setup_done ||
1188 dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1192 /* Tear down pre-existing port->queue links */
1193 if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1194 dlb2_queue_link_teardown(dlb2, ev_queue);
1197 ev_queue->setup_done = true;
1203 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1205 struct dlb2_cq_pop_qe *qe;
1207 qe = rte_zmalloc(mz_name,
1208 DLB2_NUM_QES_PER_CACHE_LINE *
1209 sizeof(struct dlb2_cq_pop_qe),
1210 RTE_CACHE_LINE_SIZE);
1213 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1216 qm_port->consume_qe = qe;
1222 /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1225 qe->tokens = 0; /* set at run time */
1228 /* Completion IDs are disabled */
1235 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1237 struct dlb2_enqueue_qe *qe;
1239 qe = rte_zmalloc(mz_name,
1240 DLB2_NUM_QES_PER_CACHE_LINE *
1241 sizeof(struct dlb2_enqueue_qe),
1242 RTE_CACHE_LINE_SIZE);
1245 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1248 qm_port->int_arm_qe = qe;
1250 /* V2 - INT ARM is CQ_TOKEN + FRAG */
1257 /* Completion IDs are disabled */
1264 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1268 sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1270 qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1272 if (qm_port->qe4 == NULL) {
1273 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1278 ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1280 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1284 ret = dlb2_init_consume_qe(qm_port, mz_name);
1286 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1294 dlb2_free_qe_mem(qm_port);
1299 static inline uint16_t
1300 dlb2_event_enqueue_delayed(void *event_port,
1301 const struct rte_event events[]);
1303 static inline uint16_t
1304 dlb2_event_enqueue_burst_delayed(void *event_port,
1305 const struct rte_event events[],
1308 static inline uint16_t
1309 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1310 const struct rte_event events[],
1313 static inline uint16_t
1314 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1315 const struct rte_event events[],
1318 /* Generate the required bitmask for rotate-style expected QE gen bits.
1319 * This requires a pattern of 1's and zeros, starting with expected as
1320 * 1 bits, so when hardware writes 0's they're "new". This requires the
1321 * ring size to be powers of 2 to wrap correctly.
1324 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1326 uint64_t cq_build_mask = 0;
1330 return; /* need to fall back to scalar code */
1333 * all 1's in first u64, all zeros in second is correct bit pattern to
1334 * start. Special casing == 64 easier than adapting complex loop logic.
1336 if (cq_depth == 64) {
1337 qm_port->cq_rolling_mask = 0;
1338 qm_port->cq_rolling_mask_2 = -1;
1342 for (i = 0; i < 64; i += (cq_depth * 2))
1343 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1345 qm_port->cq_rolling_mask = cq_build_mask;
1346 qm_port->cq_rolling_mask_2 = cq_build_mask;
1350 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1351 struct dlb2_eventdev_port *ev_port,
1352 uint32_t dequeue_depth,
1353 uint32_t enqueue_depth)
1355 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1356 struct dlb2_create_ldb_port_args cfg = { {0} };
1358 struct dlb2_port *qm_port = NULL;
1359 char mz_name[RTE_MEMZONE_NAMESIZE];
1360 uint32_t qm_port_id;
1361 uint16_t ldb_credit_high_watermark = 0;
1362 uint16_t dir_credit_high_watermark = 0;
1363 uint16_t credit_high_watermark = 0;
1368 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1369 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1374 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1375 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1376 DLB2_MIN_ENQUEUE_DEPTH);
1380 rte_spinlock_lock(&handle->resource_lock);
1382 /* We round up to the next power of 2 if necessary */
1383 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1384 cfg.cq_depth_threshold = 1;
1386 cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1388 if (handle->cos_id == DLB2_COS_DEFAULT)
1391 cfg.cos_id = handle->cos_id;
1395 /* User controls the LDB high watermark via enqueue depth. The DIR high
1396 * watermark is equal, unless the directed credit pool is too small.
1398 if (dlb2->version == DLB2_HW_V2) {
1399 ldb_credit_high_watermark = enqueue_depth;
1400 /* If there are no directed ports, the kernel driver will
1401 * ignore this port's directed credit settings. Don't use
1402 * enqueue_depth if it would require more directed credits
1403 * than are available.
1405 dir_credit_high_watermark =
1406 RTE_MIN(enqueue_depth,
1407 handle->cfg.num_dir_credits / dlb2->num_ports);
1409 credit_high_watermark = enqueue_depth;
1413 ret = dlb2_iface_ldb_port_create(handle, &cfg, dlb2->poll_mode);
1415 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1416 ret, dlb2_error_strings[cfg.response.status]);
1420 qm_port_id = cfg.response.id;
1422 DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1423 ev_port->id, qm_port_id);
1425 qm_port = &ev_port->qm_port;
1426 qm_port->ev_port = ev_port; /* back ptr */
1427 qm_port->dlb2 = dlb2; /* back ptr */
1429 * Allocate and init local qe struct(s).
1430 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1433 snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1436 ret = dlb2_init_qe_mem(qm_port, mz_name);
1438 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1442 qm_port->id = qm_port_id;
1444 if (dlb2->version == DLB2_HW_V2) {
1445 qm_port->cached_ldb_credits = 0;
1446 qm_port->cached_dir_credits = 0;
1448 qm_port->cached_credits = 0;
1450 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1451 * the effective depth is smaller.
1453 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1454 qm_port->cq_idx = 0;
1455 qm_port->cq_idx_unmasked = 0;
1457 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1458 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1460 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1462 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1463 /* starting value of gen bit - it toggles at wrap time */
1464 qm_port->gen_bit = 1;
1466 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1468 qm_port->int_armed = false;
1470 /* Save off for later use in info and lookup APIs. */
1471 qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1473 qm_port->dequeue_depth = dequeue_depth;
1474 qm_port->token_pop_thresh = dequeue_depth;
1476 /* The default enqueue functions do not include delayed-pop support for
1477 * performance reasons.
1479 if (qm_port->token_pop_mode == DELAYED_POP) {
1480 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1481 dlb2->event_dev->enqueue_burst =
1482 dlb2_event_enqueue_burst_delayed;
1483 dlb2->event_dev->enqueue_new_burst =
1484 dlb2_event_enqueue_new_burst_delayed;
1485 dlb2->event_dev->enqueue_forward_burst =
1486 dlb2_event_enqueue_forward_burst_delayed;
1489 qm_port->owed_tokens = 0;
1490 qm_port->issued_releases = 0;
1492 /* Save config message too. */
1493 rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1496 qm_port->state = PORT_STARTED; /* enabled at create time */
1497 qm_port->config_state = DLB2_CONFIGURED;
1499 if (dlb2->version == DLB2_HW_V2) {
1500 qm_port->dir_credits = dir_credit_high_watermark;
1501 qm_port->ldb_credits = ldb_credit_high_watermark;
1502 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1503 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1505 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1508 qm_port->ldb_credits,
1509 qm_port->dir_credits);
1511 qm_port->credits = credit_high_watermark;
1512 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1514 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1520 qm_port->use_scalar = false;
1522 #if (!defined RTE_ARCH_X86_64)
1523 qm_port->use_scalar = true;
1525 if ((qm_port->cq_depth > 64) ||
1526 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1527 (dlb2->vector_opts_enabled == false))
1528 qm_port->use_scalar = true;
1531 rte_spinlock_unlock(&handle->resource_lock);
1538 dlb2_free_qe_mem(qm_port);
1540 rte_spinlock_unlock(&handle->resource_lock);
1542 DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1548 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1549 struct dlb2_eventdev_port *ev_port)
1551 struct dlb2_eventdev_queue *ev_queue;
1554 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1555 if (!ev_port->link[i].valid)
1558 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1560 ev_port->link[i].valid = false;
1561 ev_port->num_links--;
1562 ev_queue->num_links--;
1567 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1568 struct dlb2_eventdev_port *ev_port,
1569 uint32_t dequeue_depth,
1570 uint32_t enqueue_depth)
1572 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1573 struct dlb2_create_dir_port_args cfg = { {0} };
1575 struct dlb2_port *qm_port = NULL;
1576 char mz_name[RTE_MEMZONE_NAMESIZE];
1577 uint32_t qm_port_id;
1578 uint16_t ldb_credit_high_watermark = 0;
1579 uint16_t dir_credit_high_watermark = 0;
1580 uint16_t credit_high_watermark = 0;
1582 if (dlb2 == NULL || handle == NULL)
1585 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1586 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1587 DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1591 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1592 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1593 DLB2_MIN_ENQUEUE_DEPTH);
1597 rte_spinlock_lock(&handle->resource_lock);
1599 /* Directed queues are configured at link time. */
1602 /* We round up to the next power of 2 if necessary */
1603 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1604 cfg.cq_depth_threshold = 1;
1606 /* User controls the LDB high watermark via enqueue depth. The DIR high
1607 * watermark is equal, unless the directed credit pool is too small.
1609 if (dlb2->version == DLB2_HW_V2) {
1610 ldb_credit_high_watermark = enqueue_depth;
1611 /* Don't use enqueue_depth if it would require more directed
1612 * credits than are available.
1614 dir_credit_high_watermark =
1615 RTE_MIN(enqueue_depth,
1616 handle->cfg.num_dir_credits / dlb2->num_ports);
1618 credit_high_watermark = enqueue_depth;
1622 ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
1624 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1625 ret, dlb2_error_strings[cfg.response.status]);
1629 qm_port_id = cfg.response.id;
1631 DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1632 ev_port->id, qm_port_id);
1634 qm_port = &ev_port->qm_port;
1635 qm_port->ev_port = ev_port; /* back ptr */
1636 qm_port->dlb2 = dlb2; /* back ptr */
1639 * Init local qe struct(s).
1640 * Note: MOVDIR64 requires the enqueue QE to be aligned
1643 snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1646 ret = dlb2_init_qe_mem(qm_port, mz_name);
1649 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1653 qm_port->id = qm_port_id;
1655 if (dlb2->version == DLB2_HW_V2) {
1656 qm_port->cached_ldb_credits = 0;
1657 qm_port->cached_dir_credits = 0;
1659 qm_port->cached_credits = 0;
1661 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1662 * the effective depth is smaller.
1664 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1665 qm_port->cq_idx = 0;
1666 qm_port->cq_idx_unmasked = 0;
1668 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1669 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1671 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1673 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1674 /* starting value of gen bit - it toggles at wrap time */
1675 qm_port->gen_bit = 1;
1676 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1678 qm_port->int_armed = false;
1680 /* Save off for later use in info and lookup APIs. */
1681 qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1683 qm_port->dequeue_depth = dequeue_depth;
1685 /* Directed ports are auto-pop, by default. */
1686 qm_port->token_pop_mode = AUTO_POP;
1687 qm_port->owed_tokens = 0;
1688 qm_port->issued_releases = 0;
1690 /* Save config message too. */
1691 rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1694 qm_port->state = PORT_STARTED; /* enabled at create time */
1695 qm_port->config_state = DLB2_CONFIGURED;
1697 if (dlb2->version == DLB2_HW_V2) {
1698 qm_port->dir_credits = dir_credit_high_watermark;
1699 qm_port->ldb_credits = ldb_credit_high_watermark;
1700 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1701 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1703 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1706 dir_credit_high_watermark,
1707 ldb_credit_high_watermark);
1709 qm_port->credits = credit_high_watermark;
1710 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1712 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1715 credit_high_watermark);
1718 #if (!defined RTE_ARCH_X86_64)
1719 qm_port->use_scalar = true;
1721 if ((qm_port->cq_depth > 64) ||
1722 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1723 (dlb2->vector_opts_enabled == false))
1724 qm_port->use_scalar = true;
1727 rte_spinlock_unlock(&handle->resource_lock);
1734 dlb2_free_qe_mem(qm_port);
1736 rte_spinlock_unlock(&handle->resource_lock);
1738 DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1744 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1746 const struct rte_event_port_conf *port_conf)
1748 struct dlb2_eventdev *dlb2;
1749 struct dlb2_eventdev_port *ev_port;
1751 uint32_t hw_credit_quanta, sw_credit_quanta;
1753 if (dev == NULL || port_conf == NULL) {
1754 DLB2_LOG_ERR("Null parameter\n");
1758 dlb2 = dlb2_pmd_priv(dev);
1760 if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1763 if (port_conf->dequeue_depth >
1764 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1765 port_conf->enqueue_depth >
1766 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1769 ev_port = &dlb2->ev_ports[ev_port_id];
1771 if (ev_port->setup_done) {
1772 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1776 ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1777 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1779 if (!ev_port->qm_port.is_directed) {
1780 ret = dlb2_hw_create_ldb_port(dlb2,
1782 port_conf->dequeue_depth,
1783 port_conf->enqueue_depth);
1785 DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1791 ret = dlb2_hw_create_dir_port(dlb2,
1793 port_conf->dequeue_depth,
1794 port_conf->enqueue_depth);
1796 DLB2_LOG_ERR("Failed to create the DIR port\n");
1801 /* Save off port config for reconfig */
1802 ev_port->conf = *port_conf;
1804 ev_port->id = ev_port_id;
1805 ev_port->enq_configured = true;
1806 ev_port->setup_done = true;
1807 ev_port->inflight_max = port_conf->new_event_threshold;
1808 ev_port->implicit_release = !(port_conf->event_port_cfg &
1809 RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1810 ev_port->outstanding_releases = 0;
1811 ev_port->inflight_credits = 0;
1812 ev_port->dlb2 = dlb2; /* reverse link */
1814 /* Default for worker ports */
1815 sw_credit_quanta = dlb2->sw_credit_quanta;
1816 hw_credit_quanta = dlb2->hw_credit_quanta;
1818 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1819 /* Producer type ports. Mostly enqueue */
1820 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1821 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1823 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1824 /* Consumer type ports. Mostly dequeue */
1825 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1826 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1828 ev_port->credit_update_quanta = sw_credit_quanta;
1829 ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1831 /* Tear down pre-existing port->queue links */
1832 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1833 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1835 dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1841 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1842 uint32_t qm_port_id,
1846 struct dlb2_map_qid_args cfg;
1853 cfg.port_id = qm_port_id;
1855 cfg.priority = EV_TO_DLB2_PRIO(priority);
1857 ret = dlb2_iface_map_qid(handle, &cfg);
1859 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1860 ret, dlb2_error_strings[cfg.response.status]);
1861 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1862 handle->domain_id, cfg.port_id,
1866 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1867 qm_qid, qm_port_id);
1874 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1875 struct dlb2_eventdev_port *ev_port,
1876 struct dlb2_eventdev_queue *ev_queue,
1879 int first_avail = -1;
1882 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1883 if (ev_port->link[i].valid) {
1884 if (ev_port->link[i].queue_id == ev_queue->id &&
1885 ev_port->link[i].priority == priority) {
1886 if (ev_port->link[i].mapped)
1887 return 0; /* already mapped */
1890 } else if (first_avail == -1)
1893 if (first_avail == -1) {
1894 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1895 ev_port->qm_port.id);
1899 ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1900 ev_port->qm_port.id,
1901 ev_queue->qm_queue.id,
1905 ev_port->link[first_avail].mapped = true;
1911 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1912 struct dlb2_eventdev_queue *ev_queue,
1915 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1916 struct dlb2_create_dir_queue_args cfg;
1919 /* The directed port is always configured before its queue */
1920 cfg.port_id = qm_port_id;
1922 if (ev_queue->depth_threshold == 0) {
1923 cfg.depth_threshold = dlb2->default_depth_thresh;
1924 ev_queue->depth_threshold =
1925 dlb2->default_depth_thresh;
1927 cfg.depth_threshold = ev_queue->depth_threshold;
1929 ret = dlb2_iface_dir_queue_create(handle, &cfg);
1931 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1932 ret, dlb2_error_strings[cfg.response.status]);
1936 return cfg.response.id;
1940 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1941 struct dlb2_eventdev_queue *ev_queue,
1942 struct dlb2_eventdev_port *ev_port)
1946 qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1949 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1953 dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1955 ev_queue->qm_queue.id = qm_qid;
1961 dlb2_do_port_link(struct rte_eventdev *dev,
1962 struct dlb2_eventdev_queue *ev_queue,
1963 struct dlb2_eventdev_port *ev_port,
1966 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1969 /* Don't link until start time. */
1970 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1973 if (ev_queue->qm_queue.is_directed)
1974 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1976 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1979 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1980 ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1981 ev_queue->id, ev_port->id);
1991 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1996 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1997 struct dlb2_eventdev_queue *ev_queue;
1998 bool port_is_dir, queue_is_dir;
2000 if (queue_id > dlb2->num_queues) {
2001 rte_errno = -EINVAL;
2005 ev_queue = &dlb2->ev_queues[queue_id];
2007 if (!ev_queue->setup_done &&
2008 ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
2009 rte_errno = -EINVAL;
2013 port_is_dir = ev_port->qm_port.is_directed;
2014 queue_is_dir = ev_queue->qm_queue.is_directed;
2016 if (port_is_dir != queue_is_dir) {
2017 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
2018 queue_is_dir ? "DIR" : "LDB", ev_queue->id,
2019 port_is_dir ? "DIR" : "LDB", ev_port->id);
2021 rte_errno = -EINVAL;
2025 /* Check if there is space for the requested link */
2026 if (!link_exists && index == -1) {
2027 DLB2_LOG_ERR("no space for new link\n");
2028 rte_errno = -ENOSPC;
2032 /* Check if the directed port is already linked */
2033 if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2035 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2037 rte_errno = -EINVAL;
2041 /* Check if the directed queue is already linked */
2042 if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2044 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2046 rte_errno = -EINVAL;
2054 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2055 const uint8_t queues[], const uint8_t priorities[],
2059 struct dlb2_eventdev_port *ev_port = event_port;
2060 struct dlb2_eventdev *dlb2;
2065 if (ev_port == NULL) {
2066 DLB2_LOG_ERR("dlb2: evport not setup\n");
2067 rte_errno = -EINVAL;
2071 if (!ev_port->setup_done &&
2072 ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2073 DLB2_LOG_ERR("dlb2: evport not setup\n");
2074 rte_errno = -EINVAL;
2078 /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2081 if (nb_links == 0) {
2082 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2083 return 0; /* Ignore and return success */
2086 dlb2 = ev_port->dlb2;
2088 DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2090 ev_port->qm_port.is_directed ? "DIR" : "LDB",
2093 for (i = 0; i < nb_links; i++) {
2094 struct dlb2_eventdev_queue *ev_queue;
2095 uint8_t queue_id, prio;
2099 queue_id = queues[i];
2100 prio = priorities[i];
2102 /* Check if the link already exists. */
2103 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2104 if (ev_port->link[j].valid) {
2105 if (ev_port->link[j].queue_id == queue_id) {
2110 } else if (index == -1) {
2114 /* could not link */
2118 /* Check if already linked at the requested priority */
2119 if (found && ev_port->link[j].priority == prio)
2122 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2123 break; /* return index of offending queue */
2125 ev_queue = &dlb2->ev_queues[queue_id];
2127 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2128 break; /* return index of offending queue */
2130 ev_queue->num_links++;
2132 ev_port->link[index].queue_id = queue_id;
2133 ev_port->link[index].priority = prio;
2134 ev_port->link[index].valid = true;
2135 /* Entry already exists? If so, then must be prio change */
2137 ev_port->num_links++;
2143 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2144 uint32_t qm_port_id,
2147 struct dlb2_unmap_qid_args cfg;
2153 cfg.port_id = qm_port_id;
2156 ret = dlb2_iface_unmap_qid(handle, &cfg);
2158 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2159 ret, dlb2_error_strings[cfg.response.status]);
2165 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2166 struct dlb2_eventdev_port *ev_port,
2167 struct dlb2_eventdev_queue *ev_queue)
2171 /* Don't unlink until start time. */
2172 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2175 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2176 if (ev_port->link[i].valid &&
2177 ev_port->link[i].queue_id == ev_queue->id)
2181 /* This is expected with eventdev API!
2182 * It blindly attempts to unmap all queues.
2184 if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2185 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2186 ev_queue->qm_queue.id,
2187 ev_port->qm_port.id);
2191 ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2192 ev_port->qm_port.id,
2193 ev_queue->qm_queue.id);
2195 ev_port->link[i].mapped = false;
2201 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2202 uint8_t queues[], uint16_t nb_unlinks)
2204 struct dlb2_eventdev_port *ev_port = event_port;
2205 struct dlb2_eventdev *dlb2;
2210 if (!ev_port->setup_done) {
2211 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2213 rte_errno = -EINVAL;
2217 if (queues == NULL || nb_unlinks == 0) {
2218 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2219 return 0; /* Ignore and return success */
2222 if (ev_port->qm_port.is_directed) {
2223 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2226 return nb_unlinks; /* as if success */
2229 dlb2 = ev_port->dlb2;
2231 for (i = 0; i < nb_unlinks; i++) {
2232 struct dlb2_eventdev_queue *ev_queue;
2235 if (queues[i] >= dlb2->num_queues) {
2236 DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2237 rte_errno = -EINVAL;
2238 return i; /* return index of offending queue */
2241 ev_queue = &dlb2->ev_queues[queues[i]];
2243 /* Does a link exist? */
2244 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2245 if (ev_port->link[j].queue_id == queues[i] &&
2246 ev_port->link[j].valid)
2249 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2252 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2254 DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2255 ret, ev_port->id, queues[i]);
2256 rte_errno = -ENOENT;
2257 return i; /* return index of offending queue */
2260 ev_port->link[j].valid = false;
2261 ev_port->num_links--;
2262 ev_queue->num_links--;
2269 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2272 struct dlb2_eventdev_port *ev_port = event_port;
2273 struct dlb2_eventdev *dlb2;
2274 struct dlb2_hw_dev *handle;
2275 struct dlb2_pending_port_unmaps_args cfg;
2280 if (!ev_port->setup_done) {
2281 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2283 rte_errno = -EINVAL;
2287 cfg.port_id = ev_port->qm_port.id;
2288 dlb2 = ev_port->dlb2;
2289 handle = &dlb2->qm_instance;
2290 ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2293 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2294 ret, dlb2_error_strings[cfg.response.status]);
2298 return cfg.response.id;
2302 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2304 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2307 /* If an event queue or port was previously configured, but hasn't been
2308 * reconfigured, reapply its original configuration.
2310 for (i = 0; i < dlb2->num_queues; i++) {
2311 struct dlb2_eventdev_queue *ev_queue;
2313 ev_queue = &dlb2->ev_queues[i];
2315 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2318 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2320 DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2325 for (i = 0; i < dlb2->num_ports; i++) {
2326 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2328 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2331 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2333 DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2343 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2345 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2348 /* Perform requested port->queue links */
2349 for (i = 0; i < dlb2->num_ports; i++) {
2350 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2353 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2354 struct dlb2_eventdev_queue *ev_queue;
2355 uint8_t prio, queue_id;
2357 if (!ev_port->link[j].valid)
2360 prio = ev_port->link[j].priority;
2361 queue_id = ev_port->link[j].queue_id;
2363 if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2366 ev_queue = &dlb2->ev_queues[queue_id];
2368 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2377 dlb2_eventdev_start(struct rte_eventdev *dev)
2379 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2380 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2381 struct dlb2_start_domain_args cfg;
2384 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2385 if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2386 DLB2_LOG_ERR("bad state %d for dev_start\n",
2387 (int)dlb2->run_state);
2388 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2391 dlb2->run_state = DLB2_RUN_STATE_STARTING;
2392 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2394 /* If the device was configured more than once, some event ports and/or
2395 * queues may need to be reconfigured.
2397 ret = dlb2_eventdev_reapply_configuration(dev);
2401 /* The DLB PMD delays port links until the device is started. */
2402 ret = dlb2_eventdev_apply_port_links(dev);
2406 for (i = 0; i < dlb2->num_ports; i++) {
2407 if (!dlb2->ev_ports[i].setup_done) {
2408 DLB2_LOG_ERR("dlb2: port %d not setup", i);
2413 for (i = 0; i < dlb2->num_queues; i++) {
2414 if (dlb2->ev_queues[i].num_links == 0) {
2415 DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2420 ret = dlb2_iface_sched_domain_start(handle, &cfg);
2422 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2423 ret, dlb2_error_strings[cfg.response.status]);
2427 dlb2->run_state = DLB2_RUN_STATE_STARTED;
2428 DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2433 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2435 /* Load-balanced cmd bytes */
2436 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2437 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2438 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2441 /* Directed cmd bytes */
2442 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2443 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2444 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2448 static inline uint32_t
2449 dlb2_port_credits_get(struct dlb2_port *qm_port,
2450 enum dlb2_hw_queue_types type)
2452 uint32_t credits = *qm_port->credit_pool[type];
2453 /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2454 uint32_t batch_size = qm_port->hw_credit_quanta;
2456 if (unlikely(credits < batch_size))
2457 batch_size = credits;
2459 if (likely(credits &&
2460 __atomic_compare_exchange_n(
2461 qm_port->credit_pool[type],
2462 &credits, credits - batch_size, false,
2463 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2470 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2471 struct dlb2_eventdev_port *ev_port)
2473 uint16_t quanta = ev_port->credit_update_quanta;
2475 if (ev_port->inflight_credits >= quanta * 2) {
2476 /* Replenish credits, saving one quanta for enqueues */
2477 uint16_t val = ev_port->inflight_credits - quanta;
2479 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2480 ev_port->inflight_credits -= val;
2485 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2486 struct dlb2_eventdev_port *ev_port)
2488 uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2492 if (unlikely(ev_port->inflight_max < sw_inflights)) {
2493 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2494 rte_errno = -ENOSPC;
2498 if (ev_port->inflight_credits < num) {
2499 /* check if event enqueue brings ev_port over max threshold */
2500 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2502 if (sw_inflights + credit_update_quanta >
2503 dlb2->new_event_limit) {
2505 ev_port->stats.traffic.tx_nospc_new_event_limit,
2507 rte_errno = -ENOSPC;
2511 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2513 ev_port->inflight_credits += (credit_update_quanta);
2515 if (ev_port->inflight_credits < num) {
2517 ev_port->stats.traffic.tx_nospc_inflight_credits,
2519 rte_errno = -ENOSPC;
2528 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2530 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2531 qm_port->cached_ldb_credits =
2532 dlb2_port_credits_get(qm_port,
2534 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2536 qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2538 DLB2_LOG_DBG("ldb credits exhausted\n");
2539 return 1; /* credits exhausted */
2547 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2549 if (unlikely(qm_port->cached_dir_credits == 0)) {
2550 qm_port->cached_dir_credits =
2551 dlb2_port_credits_get(qm_port,
2553 if (unlikely(qm_port->cached_dir_credits == 0)) {
2555 qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2557 DLB2_LOG_DBG("dir credits exhausted\n");
2558 return 1; /* credits exhausted */
2566 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2568 if (unlikely(qm_port->cached_credits == 0)) {
2569 qm_port->cached_credits =
2570 dlb2_port_credits_get(qm_port,
2571 DLB2_COMBINED_POOL);
2572 if (unlikely(qm_port->cached_credits == 0)) {
2574 qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2575 DLB2_LOG_DBG("credits exhausted\n");
2576 return 1; /* credits exhausted */
2583 static __rte_always_inline void
2584 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2585 struct process_local_port_data *port_data)
2587 dlb2_movdir64b(port_data->pp_addr, qe4);
2591 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2593 struct process_local_port_data *port_data;
2594 struct dlb2_cq_pop_qe *qe;
2596 RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2598 qe = qm_port->consume_qe;
2600 qe->tokens = num - 1;
2602 /* No store fence needed since no pointer is being sent, and CQ token
2603 * pops can be safely reordered with other HCWs.
2605 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2607 dlb2_movntdq_single(port_data->pp_addr, qe);
2609 DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2611 qm_port->owed_tokens = 0;
2617 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2619 struct process_local_port_data *port_data)
2621 /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2622 * application writes complete before enqueueing the QE.
2627 dlb2_pp_write(qm_port->qe4, port_data);
2631 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2633 struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2634 int num = qm_port->owed_tokens;
2636 qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2637 qe[idx].tokens = num - 1;
2639 qm_port->owed_tokens = 0;
2643 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2644 const struct rte_event ev[],
2646 uint8_t *sched_type,
2649 struct dlb2_enqueue_qe *qe;
2650 uint16_t sched_word[4];
2656 sse_qe[0] = _mm_setzero_si128();
2657 sse_qe[1] = _mm_setzero_si128();
2661 /* Construct the metadata portion of two HCWs in one 128b SSE
2662 * register. HCW metadata is constructed in the SSE registers
2664 * sse_qe[0][63:0]: qe[0]'s metadata
2665 * sse_qe[0][127:64]: qe[1]'s metadata
2666 * sse_qe[1][63:0]: qe[2]'s metadata
2667 * sse_qe[1][127:64]: qe[3]'s metadata
2670 /* Convert the event operation into a command byte and store it
2672 * sse_qe[0][63:56] = cmd_byte_map[is_directed][ev[0].op]
2673 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2674 * sse_qe[1][63:56] = cmd_byte_map[is_directed][ev[2].op]
2675 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2677 #define DLB2_QE_CMD_BYTE 7
2678 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2679 cmd_byte_map[qm_port->is_directed][ev[0].op],
2681 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2682 cmd_byte_map[qm_port->is_directed][ev[1].op],
2683 DLB2_QE_CMD_BYTE + 8);
2684 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2685 cmd_byte_map[qm_port->is_directed][ev[2].op],
2687 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2688 cmd_byte_map[qm_port->is_directed][ev[3].op],
2689 DLB2_QE_CMD_BYTE + 8);
2691 /* Store priority, scheduling type, and queue ID in the sched
2692 * word array because these values are re-used when the
2693 * destination is a directed queue.
2695 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2696 sched_type[0] << 8 |
2698 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2699 sched_type[1] << 8 |
2701 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2702 sched_type[2] << 8 |
2704 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2705 sched_type[3] << 8 |
2708 /* Store the event priority, scheduling type, and queue ID in
2710 * sse_qe[0][31:16] = sched_word[0]
2711 * sse_qe[0][95:80] = sched_word[1]
2712 * sse_qe[1][31:16] = sched_word[2]
2713 * sse_qe[1][95:80] = sched_word[3]
2715 #define DLB2_QE_QID_SCHED_WORD 1
2716 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2718 DLB2_QE_QID_SCHED_WORD);
2719 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2721 DLB2_QE_QID_SCHED_WORD + 4);
2722 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2724 DLB2_QE_QID_SCHED_WORD);
2725 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2727 DLB2_QE_QID_SCHED_WORD + 4);
2729 /* If the destination is a load-balanced queue, store the lock
2730 * ID. If it is a directed queue, DLB places this field in
2731 * bytes 10-11 of the received QE, so we format it accordingly:
2732 * sse_qe[0][47:32] = dir queue ? sched_word[0] : flow_id[0]
2733 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2734 * sse_qe[1][47:32] = dir queue ? sched_word[2] : flow_id[2]
2735 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2737 #define DLB2_QE_LOCK_ID_WORD 2
2738 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2739 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2740 sched_word[0] : ev[0].flow_id,
2741 DLB2_QE_LOCK_ID_WORD);
2742 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2743 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2744 sched_word[1] : ev[1].flow_id,
2745 DLB2_QE_LOCK_ID_WORD + 4);
2746 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2747 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2748 sched_word[2] : ev[2].flow_id,
2749 DLB2_QE_LOCK_ID_WORD);
2750 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2751 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2752 sched_word[3] : ev[3].flow_id,
2753 DLB2_QE_LOCK_ID_WORD + 4);
2755 /* Store the event type and sub event type in the metadata:
2756 * sse_qe[0][15:0] = flow_id[0]
2757 * sse_qe[0][79:64] = flow_id[1]
2758 * sse_qe[1][15:0] = flow_id[2]
2759 * sse_qe[1][79:64] = flow_id[3]
2761 #define DLB2_QE_EV_TYPE_WORD 0
2762 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2763 ev[0].sub_event_type << 8 |
2765 DLB2_QE_EV_TYPE_WORD);
2766 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2767 ev[1].sub_event_type << 8 |
2769 DLB2_QE_EV_TYPE_WORD + 4);
2770 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2771 ev[2].sub_event_type << 8 |
2773 DLB2_QE_EV_TYPE_WORD);
2774 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2775 ev[3].sub_event_type << 8 |
2777 DLB2_QE_EV_TYPE_WORD + 4);
2779 /* Store the metadata to memory (use the double-precision
2780 * _mm_storeh_pd because there is no integer function for
2781 * storing the upper 64b):
2782 * qe[0] metadata = sse_qe[0][63:0]
2783 * qe[1] metadata = sse_qe[0][127:64]
2784 * qe[2] metadata = sse_qe[1][63:0]
2785 * qe[3] metadata = sse_qe[1][127:64]
2787 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2788 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2789 (__m128d)sse_qe[0]);
2790 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2791 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2792 (__m128d)sse_qe[1]);
2794 qe[0].data = ev[0].u64;
2795 qe[1].data = ev[1].u64;
2796 qe[2].data = ev[2].u64;
2797 qe[3].data = ev[3].u64;
2803 for (i = 0; i < num; i++) {
2805 cmd_byte_map[qm_port->is_directed][ev[i].op];
2806 qe[i].sched_type = sched_type[i];
2807 qe[i].data = ev[i].u64;
2808 qe[i].qid = queue_id[i];
2809 qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2810 qe[i].lock_id = ev[i].flow_id;
2811 if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2812 struct dlb2_msg_info *info =
2813 (struct dlb2_msg_info *)&qe[i].lock_id;
2815 info->qid = queue_id[i];
2816 info->sched_type = DLB2_SCHED_DIRECTED;
2817 info->priority = qe[i].priority;
2819 qe[i].u.event_type.major = ev[i].event_type;
2820 qe[i].u.event_type.sub = ev[i].sub_event_type;
2829 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2830 struct dlb2_port *qm_port,
2831 const struct rte_event ev[],
2832 uint8_t *sched_type,
2835 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2836 struct dlb2_eventdev_queue *ev_queue;
2837 uint16_t *cached_credits = NULL;
2838 struct dlb2_queue *qm_queue;
2840 ev_queue = &dlb2->ev_queues[ev->queue_id];
2841 qm_queue = &ev_queue->qm_queue;
2842 *queue_id = qm_queue->id;
2844 /* Ignore sched_type and hardware credits on release events */
2845 if (ev->op == RTE_EVENT_OP_RELEASE)
2848 if (!qm_queue->is_directed) {
2849 /* Load balanced destination queue */
2851 if (dlb2->version == DLB2_HW_V2) {
2852 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2853 rte_errno = -ENOSPC;
2856 cached_credits = &qm_port->cached_ldb_credits;
2858 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2859 rte_errno = -ENOSPC;
2862 cached_credits = &qm_port->cached_credits;
2864 switch (ev->sched_type) {
2865 case RTE_SCHED_TYPE_ORDERED:
2866 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2867 if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2868 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2870 rte_errno = -EINVAL;
2873 *sched_type = DLB2_SCHED_ORDERED;
2875 case RTE_SCHED_TYPE_ATOMIC:
2876 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2877 *sched_type = DLB2_SCHED_ATOMIC;
2879 case RTE_SCHED_TYPE_PARALLEL:
2880 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2881 if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2882 *sched_type = DLB2_SCHED_ORDERED;
2884 *sched_type = DLB2_SCHED_UNORDERED;
2887 DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2888 DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2889 rte_errno = -EINVAL;
2893 /* Directed destination queue */
2895 if (dlb2->version == DLB2_HW_V2) {
2896 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2897 rte_errno = -ENOSPC;
2900 cached_credits = &qm_port->cached_dir_credits;
2902 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2903 rte_errno = -ENOSPC;
2906 cached_credits = &qm_port->cached_credits;
2908 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2910 *sched_type = DLB2_SCHED_DIRECTED;
2915 case RTE_EVENT_OP_NEW:
2916 /* Check that a sw credit is available */
2917 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2918 rte_errno = -ENOSPC;
2921 ev_port->inflight_credits--;
2922 (*cached_credits)--;
2924 case RTE_EVENT_OP_FORWARD:
2925 /* Check for outstanding_releases underflow. If this occurs,
2926 * the application is not using the EVENT_OPs correctly; for
2927 * example, forwarding or releasing events that were not
2930 RTE_ASSERT(ev_port->outstanding_releases > 0);
2931 ev_port->outstanding_releases--;
2932 qm_port->issued_releases++;
2933 (*cached_credits)--;
2935 case RTE_EVENT_OP_RELEASE:
2936 ev_port->inflight_credits++;
2937 /* Check for outstanding_releases underflow. If this occurs,
2938 * the application is not using the EVENT_OPs correctly; for
2939 * example, forwarding or releasing events that were not
2942 RTE_ASSERT(ev_port->outstanding_releases > 0);
2943 ev_port->outstanding_releases--;
2944 qm_port->issued_releases++;
2946 /* Replenish s/w credits if enough are cached */
2947 dlb2_replenish_sw_credits(dlb2, ev_port);
2951 DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2952 DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2954 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2955 if (ev->op != RTE_EVENT_OP_RELEASE) {
2956 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2957 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2964 static inline uint16_t
2965 __dlb2_event_enqueue_burst(void *event_port,
2966 const struct rte_event events[],
2970 struct dlb2_eventdev_port *ev_port = event_port;
2971 struct dlb2_port *qm_port = &ev_port->qm_port;
2972 struct process_local_port_data *port_data;
2975 RTE_ASSERT(ev_port->enq_configured);
2976 RTE_ASSERT(events != NULL);
2980 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2983 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2984 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2988 memset(qm_port->qe4,
2990 DLB2_NUM_QES_PER_CACHE_LINE *
2991 sizeof(struct dlb2_enqueue_qe));
2993 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2994 const struct rte_event *ev = &events[i + j];
2995 int16_t thresh = qm_port->token_pop_thresh;
2998 qm_port->token_pop_mode == DELAYED_POP &&
2999 (ev->op == RTE_EVENT_OP_FORWARD ||
3000 ev->op == RTE_EVENT_OP_RELEASE) &&
3001 qm_port->issued_releases >= thresh - 1) {
3002 /* Insert the token pop QE and break out. This
3003 * may result in a partial HCW, but that is
3004 * simpler than supporting arbitrary QE
3007 dlb2_construct_token_pop_qe(qm_port, j);
3009 /* Reset the releases for the next QE batch */
3010 qm_port->issued_releases -= thresh;
3017 if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
3026 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
3027 sched_types, queue_ids);
3029 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3031 /* Don't include the token pop QE in the enqueue count */
3034 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3037 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3045 dlb2_event_enqueue_burst(void *event_port,
3046 const struct rte_event events[],
3049 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3053 dlb2_event_enqueue_burst_delayed(void *event_port,
3054 const struct rte_event events[],
3057 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3060 static inline uint16_t
3061 dlb2_event_enqueue(void *event_port,
3062 const struct rte_event events[])
3064 return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3067 static inline uint16_t
3068 dlb2_event_enqueue_delayed(void *event_port,
3069 const struct rte_event events[])
3071 return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3075 dlb2_event_enqueue_new_burst(void *event_port,
3076 const struct rte_event events[],
3079 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3083 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3084 const struct rte_event events[],
3087 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3091 dlb2_event_enqueue_forward_burst(void *event_port,
3092 const struct rte_event events[],
3095 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3099 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3100 const struct rte_event events[],
3103 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3107 dlb2_event_release(struct dlb2_eventdev *dlb2,
3111 struct process_local_port_data *port_data;
3112 struct dlb2_eventdev_port *ev_port;
3113 struct dlb2_port *qm_port;
3116 if (port_id > dlb2->num_ports) {
3117 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3119 rte_errno = -EINVAL;
3123 ev_port = &dlb2->ev_ports[port_id];
3124 qm_port = &ev_port->qm_port;
3125 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3129 if (qm_port->is_directed) {
3131 goto sw_credit_update;
3139 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3140 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3141 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3142 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3145 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3146 int16_t thresh = qm_port->token_pop_thresh;
3148 if (qm_port->token_pop_mode == DELAYED_POP &&
3149 qm_port->issued_releases >= thresh - 1) {
3150 /* Insert the token pop QE */
3151 dlb2_construct_token_pop_qe(qm_port, j);
3153 /* Reset the releases for the next QE batch */
3154 qm_port->issued_releases -= thresh;
3161 qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3162 qm_port->issued_releases++;
3165 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3167 /* Don't include the token pop QE in the release count */
3172 /* each release returns one credit */
3173 if (unlikely(!ev_port->outstanding_releases)) {
3174 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3178 ev_port->outstanding_releases -= i;
3179 ev_port->inflight_credits += i;
3181 /* Replenish s/w credits if enough releases are performed */
3182 dlb2_replenish_sw_credits(dlb2, ev_port);
3186 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3188 uint32_t batch_size = qm_port->hw_credit_quanta;
3190 /* increment port credits, and return to pool if exceeds threshold */
3191 if (!qm_port->is_directed) {
3192 if (qm_port->dlb2->version == DLB2_HW_V2) {
3193 qm_port->cached_ldb_credits += num;
3194 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3196 qm_port->credit_pool[DLB2_LDB_QUEUE],
3197 batch_size, __ATOMIC_SEQ_CST);
3198 qm_port->cached_ldb_credits -= batch_size;
3201 qm_port->cached_credits += num;
3202 if (qm_port->cached_credits >= 2 * batch_size) {
3204 qm_port->credit_pool[DLB2_COMBINED_POOL],
3205 batch_size, __ATOMIC_SEQ_CST);
3206 qm_port->cached_credits -= batch_size;
3210 if (qm_port->dlb2->version == DLB2_HW_V2) {
3211 qm_port->cached_dir_credits += num;
3212 if (qm_port->cached_dir_credits >= 2 * batch_size) {
3214 qm_port->credit_pool[DLB2_DIR_QUEUE],
3215 batch_size, __ATOMIC_SEQ_CST);
3216 qm_port->cached_dir_credits -= batch_size;
3219 qm_port->cached_credits += num;
3220 if (qm_port->cached_credits >= 2 * batch_size) {
3222 qm_port->credit_pool[DLB2_COMBINED_POOL],
3223 batch_size, __ATOMIC_SEQ_CST);
3224 qm_port->cached_credits -= batch_size;
3230 #define CLB_MASK_IDX 0
3231 #define CLB_VAL_IDX 1
3233 dlb2_monitor_callback(const uint64_t val,
3234 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3236 /* abort if the value matches */
3237 return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3241 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3242 struct dlb2_eventdev_port *ev_port,
3243 struct dlb2_port *qm_port,
3245 uint64_t start_ticks)
3247 struct process_local_port_data *port_data;
3248 uint64_t elapsed_ticks;
3250 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3252 elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3254 /* Wait/poll time expired */
3255 if (elapsed_ticks >= timeout) {
3257 } else if (dlb2->umwait_allowed) {
3258 struct rte_power_monitor_cond pmc;
3259 volatile struct dlb2_dequeue_qe *cq_base;
3262 struct dlb2_dequeue_qe qe;
3264 uint64_t expected_value;
3265 volatile uint64_t *monitor_addr;
3267 qe_mask.qe.cq_gen = 1; /* set mask */
3269 cq_base = port_data->cq_base;
3270 monitor_addr = (volatile uint64_t *)(volatile void *)
3271 &cq_base[qm_port->cq_idx];
3272 monitor_addr++; /* cq_gen bit is in second 64bit location */
3274 if (qm_port->gen_bit)
3275 expected_value = qe_mask.raw_qe[1];
3279 pmc.addr = monitor_addr;
3280 /* store expected value and comparison mask in opaque data */
3281 pmc.opaque[CLB_VAL_IDX] = expected_value;
3282 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3283 /* set up callback */
3284 pmc.fn = dlb2_monitor_callback;
3285 pmc.size = sizeof(uint64_t);
3287 rte_power_monitor(&pmc, timeout + start_ticks);
3289 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3291 uint64_t poll_interval = dlb2->poll_interval;
3292 uint64_t curr_ticks = rte_get_timer_cycles();
3293 uint64_t init_ticks = curr_ticks;
3295 while ((curr_ticks - start_ticks < timeout) &&
3296 (curr_ticks - init_ticks < poll_interval))
3297 curr_ticks = rte_get_timer_cycles();
3303 static __rte_noinline int
3304 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3305 struct dlb2_port *qm_port,
3306 struct rte_event *events,
3307 struct dlb2_dequeue_qe *qes,
3310 uint8_t *qid_mappings = qm_port->qid_mappings;
3313 for (i = 0, num = 0; i < cnt; i++) {
3314 struct dlb2_dequeue_qe *qe = &qes[i];
3315 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3316 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3317 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3318 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3319 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3322 /* Fill in event information.
3323 * Note that flow_id must be embedded in the data by
3324 * the app, such as the mbuf RSS hash field if the data
3327 if (unlikely(qe->error)) {
3328 DLB2_LOG_ERR("QE error bit ON\n");
3329 DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3330 dlb2_consume_qe_immediate(qm_port, 1);
3331 continue; /* Ignore */
3334 events[num].u64 = qe->data;
3335 events[num].flow_id = qe->flow_id;
3336 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3337 events[num].event_type = qe->u.event_type.major;
3338 events[num].sub_event_type = qe->u.event_type.sub;
3339 events[num].sched_type = sched_type_map[qe->sched_type];
3340 events[num].impl_opaque = qe->qid_depth;
3342 /* qid not preserved for directed queues */
3343 if (qm_port->is_directed)
3344 evq_id = ev_port->link[0].queue_id;
3346 evq_id = qid_mappings[qe->qid];
3348 events[num].queue_id = evq_id;
3350 ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3352 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3356 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3362 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3363 struct dlb2_port *qm_port,
3364 struct rte_event *events,
3365 struct dlb2_dequeue_qe *qes)
3367 int sched_type_map[] = {
3368 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3369 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3370 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3371 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3373 const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3374 uint8_t *qid_mappings = qm_port->qid_mappings;
3377 /* In the unlikely case that any of the QE error bits are set, process
3378 * them one at a time.
3380 if (unlikely(qes[0].error || qes[1].error ||
3381 qes[2].error || qes[3].error))
3382 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3385 events[0].u64 = qes[0].data;
3386 events[1].u64 = qes[1].data;
3387 events[2].u64 = qes[2].data;
3388 events[3].u64 = qes[3].data;
3390 /* Construct the metadata portion of two struct rte_events
3391 * in one 128b SSE register. Event metadata is constructed in the SSE
3392 * registers like so:
3393 * sse_evt[0][63:0]: event[0]'s metadata
3394 * sse_evt[0][127:64]: event[1]'s metadata
3395 * sse_evt[1][63:0]: event[2]'s metadata
3396 * sse_evt[1][127:64]: event[3]'s metadata
3398 sse_evt[0] = _mm_setzero_si128();
3399 sse_evt[1] = _mm_setzero_si128();
3401 /* Convert the hardware queue ID to an event queue ID and store it in
3403 * sse_evt[0][47:40] = qid_mappings[qes[0].qid]
3404 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3405 * sse_evt[1][47:40] = qid_mappings[qes[2].qid]
3406 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3408 #define DLB_EVENT_QUEUE_ID_BYTE 5
3409 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3410 qid_mappings[qes[0].qid],
3411 DLB_EVENT_QUEUE_ID_BYTE);
3412 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3413 qid_mappings[qes[1].qid],
3414 DLB_EVENT_QUEUE_ID_BYTE + 8);
3415 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3416 qid_mappings[qes[2].qid],
3417 DLB_EVENT_QUEUE_ID_BYTE);
3418 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3419 qid_mappings[qes[3].qid],
3420 DLB_EVENT_QUEUE_ID_BYTE + 8);
3422 /* Convert the hardware priority to an event priority and store it in
3423 * the metadata, while also returning the queue depth status
3424 * value captured by the hardware, storing it in impl_opaque, which can
3425 * be read by the application but not modified
3426 * sse_evt[0][55:48] = DLB2_TO_EV_PRIO(qes[0].priority)
3427 * sse_evt[0][63:56] = qes[0].qid_depth
3428 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3429 * sse_evt[0][127:120] = qes[1].qid_depth
3430 * sse_evt[1][55:48] = DLB2_TO_EV_PRIO(qes[2].priority)
3431 * sse_evt[1][63:56] = qes[2].qid_depth
3432 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3433 * sse_evt[1][127:120] = qes[3].qid_depth
3435 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3436 #define DLB_BYTE_SHIFT 8
3438 _mm_insert_epi16(sse_evt[0],
3439 DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3440 (qes[0].qid_depth << DLB_BYTE_SHIFT),
3441 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3443 _mm_insert_epi16(sse_evt[0],
3444 DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3445 (qes[1].qid_depth << DLB_BYTE_SHIFT),
3446 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3448 _mm_insert_epi16(sse_evt[1],
3449 DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3450 (qes[2].qid_depth << DLB_BYTE_SHIFT),
3451 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3453 _mm_insert_epi16(sse_evt[1],
3454 DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3455 (qes[3].qid_depth << DLB_BYTE_SHIFT),
3456 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3458 /* Write the event type, sub event type, and flow_id to the event
3460 * sse_evt[0][31:0] = qes[0].flow_id |
3461 * qes[0].u.event_type.major << 28 |
3462 * qes[0].u.event_type.sub << 20;
3463 * sse_evt[0][95:64] = qes[1].flow_id |
3464 * qes[1].u.event_type.major << 28 |
3465 * qes[1].u.event_type.sub << 20;
3466 * sse_evt[1][31:0] = qes[2].flow_id |
3467 * qes[2].u.event_type.major << 28 |
3468 * qes[2].u.event_type.sub << 20;
3469 * sse_evt[1][95:64] = qes[3].flow_id |
3470 * qes[3].u.event_type.major << 28 |
3471 * qes[3].u.event_type.sub << 20;
3473 #define DLB_EVENT_EV_TYPE_DW 0
3474 #define DLB_EVENT_EV_TYPE_SHIFT 28
3475 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3476 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3478 qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3479 qes[0].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3480 DLB_EVENT_EV_TYPE_DW);
3481 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3483 qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3484 qes[1].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3485 DLB_EVENT_EV_TYPE_DW + 2);
3486 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3488 qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3489 qes[2].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3490 DLB_EVENT_EV_TYPE_DW);
3491 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3493 qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3494 qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3495 DLB_EVENT_EV_TYPE_DW + 2);
3497 /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3499 * sse_evt[0][39:32] = sched_type_map[qes[0].sched_type] << 6
3500 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3501 * sse_evt[1][39:32] = sched_type_map[qes[2].sched_type] << 6
3502 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3504 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3505 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3506 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3507 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3508 DLB_EVENT_SCHED_TYPE_BYTE);
3509 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3510 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3511 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3512 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3513 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3514 DLB_EVENT_SCHED_TYPE_BYTE);
3515 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3516 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3517 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3519 /* Store the metadata to the event (use the double-precision
3520 * _mm_storeh_pd because there is no integer function for storing the
3522 * events[0].event = sse_evt[0][63:0]
3523 * events[1].event = sse_evt[0][127:64]
3524 * events[2].event = sse_evt[1][63:0]
3525 * events[3].event = sse_evt[1][127:64]
3527 _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3528 _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3529 _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3530 _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3532 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3533 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3534 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3535 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3538 ev_port->stats.queue[events[0].queue_id].
3539 qid_depth[qes[0].qid_depth],
3542 ev_port->stats.queue[events[1].queue_id].
3543 qid_depth[qes[1].qid_depth],
3546 ev_port->stats.queue[events[2].queue_id].
3547 qid_depth[qes[2].qid_depth],
3550 ev_port->stats.queue[events[3].queue_id].
3551 qid_depth[qes[3].qid_depth],
3554 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3559 static __rte_always_inline int
3560 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3562 volatile struct dlb2_dequeue_qe *cq_addr;
3563 uint8_t xor_mask[2] = {0x0F, 0x00};
3564 const uint8_t and_mask = 0x0F;
3565 __m128i *qes = (__m128i *)qe;
3566 uint8_t gen_bits, gen_bit;
3570 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3572 idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3573 /* Load the next 4 QEs */
3574 addr[0] = (uintptr_t)&cq_addr[idx];
3575 addr[1] = (uintptr_t)&cq_addr[(idx + 4) & qm_port->cq_depth_mask];
3576 addr[2] = (uintptr_t)&cq_addr[(idx + 8) & qm_port->cq_depth_mask];
3577 addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3579 /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3580 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3581 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3582 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3583 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3585 /* Correct the xor_mask for wrap-around QEs */
3586 gen_bit = qm_port->gen_bit;
3587 xor_mask[gen_bit] ^= !!((idx + 4) > qm_port->cq_depth_mask) << 1;
3588 xor_mask[gen_bit] ^= !!((idx + 8) > qm_port->cq_depth_mask) << 2;
3589 xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3591 /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3592 * valid, then QEs[0:N-1] are too.
3594 qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3595 rte_compiler_barrier();
3596 qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3597 rte_compiler_barrier();
3598 qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3599 rte_compiler_barrier();
3600 qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3602 /* Extract and combine the gen bits */
3603 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3604 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3605 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3606 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3608 /* XOR the combined bits such that a 1 represents a valid QE */
3609 gen_bits ^= xor_mask[gen_bit];
3611 /* Mask off gen bits we don't care about */
3612 gen_bits &= and_mask;
3614 return __builtin_popcount(gen_bits);
3618 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3619 struct rte_event *events,
3625 __m128i v_qe_status,
3626 uint32_t valid_events)
3628 /* Look up the event QIDs, using the hardware QIDs to index the
3629 * port's QID mapping.
3631 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3632 * passed along in registers as the QE data is required later.
3634 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3635 * 32-bit slice of each QE, so makes up a full SSE register. This
3636 * allows parallel processing of 4x QEs in a single register.
3639 __m128i v_qid_done = {0};
3640 int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3641 int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3642 int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3643 int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3645 int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3646 int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3647 int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3648 int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3650 int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3651 int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3652 int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3653 int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3655 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3656 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3657 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3658 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3660 /* Schedule field remapping using byte shuffle
3661 * - Full byte containing sched field handled here (op, rsvd are zero)
3662 * - Note sanitizing the register requires two masking ANDs:
3663 * 1) to strip prio/msg_type from byte for correct shuffle lookup
3664 * 2) to strip any non-sched-field lanes from any results to OR later
3665 * - Final byte result is >> 10 to another byte-lane inside the u32.
3666 * This makes the final combination OR easier to make the rte_event.
3668 __m128i v_sched_done;
3669 __m128i v_sched_bits;
3671 static const uint8_t sched_type_map[16] = {
3672 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3673 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3674 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3675 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3677 static const uint8_t sched_and_mask[16] = {
3678 0x00, 0x00, 0x00, 0x03,
3679 0x00, 0x00, 0x00, 0x03,
3680 0x00, 0x00, 0x00, 0x03,
3681 0x00, 0x00, 0x00, 0x03,
3683 const __m128i v_sched_map = _mm_loadu_si128(
3684 (const __m128i *)sched_type_map);
3685 __m128i v_sched_mask = _mm_loadu_si128(
3686 (const __m128i *)&sched_and_mask);
3687 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3688 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3690 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3692 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3695 /* Priority handling
3696 * - QE provides 3 bits of priority
3697 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3698 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3700 __m128i v_prio_done;
3702 static const uint8_t prio_mask[16] = {
3703 0x00, 0x00, 0x00, 0x07 << 5,
3704 0x00, 0x00, 0x00, 0x07 << 5,
3705 0x00, 0x00, 0x00, 0x07 << 5,
3706 0x00, 0x00, 0x00, 0x07 << 5,
3708 __m128i v_prio_mask = _mm_loadu_si128(
3709 (const __m128i *)prio_mask);
3710 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3711 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3714 /* Event Sub/Type handling:
3715 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3716 * to get the sub/ev type data into rte_event location, clearing the
3717 * lower 20 bits in the process.
3719 __m128i v_types_done;
3721 static const uint8_t event_mask[16] = {
3722 0x0f, 0x00, 0x00, 0x00,
3723 0x0f, 0x00, 0x00, 0x00,
3724 0x0f, 0x00, 0x00, 0x00,
3725 0x0f, 0x00, 0x00, 0x00,
3727 static const uint8_t sub_event_mask[16] = {
3728 0xff, 0x00, 0x00, 0x00,
3729 0xff, 0x00, 0x00, 0x00,
3730 0xff, 0x00, 0x00, 0x00,
3731 0xff, 0x00, 0x00, 0x00,
3733 static const uint8_t flow_mask[16] = {
3734 0xff, 0xff, 0x00, 0x00,
3735 0xff, 0xff, 0x00, 0x00,
3736 0xff, 0xff, 0x00, 0x00,
3737 0xff, 0xff, 0x00, 0x00,
3739 __m128i v_event_mask = _mm_loadu_si128(
3740 (const __m128i *)event_mask);
3741 __m128i v_sub_event_mask = _mm_loadu_si128(
3742 (const __m128i *)sub_event_mask);
3743 __m128i v_flow_mask = _mm_loadu_si128(
3744 (const __m128i *)flow_mask);
3745 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3746 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3747 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3748 v_type = _mm_slli_epi32(v_type, 8);
3749 v_types_done = _mm_or_si128(v_type, v_sub);
3750 v_types_done = _mm_slli_epi32(v_types_done, 20);
3751 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3752 v_types_done = _mm_or_si128(v_types_done, v_flow);
3755 /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3756 * with the rte_event, allowing unpacks to move/blend with payload.
3758 __m128i v_q_s_p_done;
3760 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3761 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3762 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3765 __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3767 /* Unpack evs into u64 metadata, then indiv events */
3768 v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3769 v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3771 switch (valid_events) {
3773 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3774 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3775 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3776 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3780 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3781 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3782 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3786 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3787 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3788 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3789 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3793 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3794 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3795 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3800 static __rte_always_inline int
3801 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3802 uint32_t max_events)
3804 /* Using unmasked idx for perf, and masking manually */
3805 uint16_t idx = qm_port->cq_idx_unmasked;
3806 volatile struct dlb2_dequeue_qe *cq_addr;
3808 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3810 uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3811 qm_port->cq_depth_mask];
3812 uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx + 8) &
3813 qm_port->cq_depth_mask];
3814 uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx + 4) &
3815 qm_port->cq_depth_mask];
3816 uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx + 0) &
3817 qm_port->cq_depth_mask];
3819 /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3820 __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3821 rte_compiler_barrier();
3822 __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3823 rte_compiler_barrier();
3824 __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3825 rte_compiler_barrier();
3826 __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3828 /* Generate the pkt_shuffle mask;
3829 * - Avoids load in otherwise load-heavy section of code
3830 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3832 const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3833 __m128i v_zeros = _mm_setzero_si128();
3834 __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3835 __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3837 /* Extract u32 components required from the QE
3838 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3839 * - QE[96 to 127] for status (cq gen bit, error)
3841 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3843 __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3844 __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3845 __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3846 __m128i v_qe_meta = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3848 /* Status byte (gen_bit, error) handling:
3849 * - Shuffle to lanes 0,1,2,3, clear all others
3850 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3851 * - Shift right by 2 for error bit to MSB, movemask to scalar
3853 __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3854 __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3855 int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3857 /* Expected vs Reality of QE Gen bits
3858 * - cq_rolling_mask provides expected bits
3859 * - QE loads, unpacks/shuffle and movemask provides reality
3860 * - XOR of the two gives bitmask of new packets
3861 * - POPCNT to get the number of new events
3863 uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3864 uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3865 uint32_t count_new = __builtin_popcount(qe_xor_bits);
3866 count_new = RTE_MIN(count_new, max_events);
3870 /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3872 uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3873 uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3874 uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3875 uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3877 /* shifted out of m2 into MSB of m */
3878 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3880 /* shifted out of m "looped back" into MSB of m2 */
3881 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3883 /* Prefetch the next QEs - should run as IPC instead of cycles */
3884 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3885 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3886 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3887 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3889 /* Convert QEs from XMM regs to events and store events directly */
3890 _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3891 v_qe_0, v_qe_meta, v_qe_status, count_new);
3897 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3899 uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3901 qm_port->cq_idx_unmasked = idx;
3902 qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3903 qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3906 static inline int16_t
3907 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3908 struct dlb2_eventdev_port *ev_port,
3909 struct rte_event *events,
3911 uint64_t dequeue_timeout_ticks)
3913 uint64_t start_ticks = 0ULL;
3914 struct dlb2_port *qm_port;
3919 qm_port = &ev_port->qm_port;
3920 use_scalar = qm_port->use_scalar;
3922 if (!dlb2->global_dequeue_wait)
3923 timeout = dequeue_timeout_ticks;
3925 timeout = dlb2->global_dequeue_wait_ticks;
3927 start_ticks = rte_get_timer_cycles();
3929 use_scalar = use_scalar || (max_num & 0x3);
3931 while (num < max_num) {
3932 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3937 uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3939 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3940 num_avail = RTE_MIN(num_avail, max_num - num);
3941 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3942 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3943 n_iter = dlb2_process_dequeue_four_qes(ev_port,
3948 n_iter = dlb2_process_dequeue_qes(ev_port,
3955 /* update rolling_mask for vector code support */
3956 m_rshift = qm_port->cq_rolling_mask >> n_iter;
3957 m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3958 m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3959 m2_lshift = qm_port->cq_rolling_mask_2 <<
3961 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3962 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3964 } else { /* !use_scalar */
3965 num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3968 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3970 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3973 if ((timeout == 0) || (num > 0))
3974 /* Not waiting in any form or 1+ events recd */
3976 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3977 timeout, start_ticks))
3982 qm_port->owed_tokens += num;
3985 if (qm_port->token_pop_mode == AUTO_POP)
3986 dlb2_consume_qe_immediate(qm_port, num);
3988 ev_port->outstanding_releases += num;
3990 dlb2_port_credits_inc(qm_port, num);
3996 static __rte_always_inline int
3997 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
4000 uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
4001 {0x00, 0x01, 0x03, 0x07} };
4002 uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
4003 volatile struct dlb2_dequeue_qe *cq_addr;
4004 __m128i *qes = (__m128i *)qe;
4005 uint64_t *cache_line_base;
4008 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
4009 cq_addr = &cq_addr[qm_port->cq_idx];
4011 cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
4012 *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
4014 /* Load the next CQ cache line from memory. Pack these reads as tight
4015 * as possible to reduce the chance that DLB invalidates the line while
4016 * the CPU is reading it. Read the cache line backwards to ensure that
4017 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
4019 * (Valid QEs start at &qe[offset])
4021 qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
4022 qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
4023 qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
4024 qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
4026 /* Evict the cache line ASAP */
4027 rte_cldemote(cache_line_base);
4029 /* Extract and combine the gen bits */
4030 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
4031 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
4032 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
4033 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4035 /* XOR the combined bits such that a 1 represents a valid QE */
4036 gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4038 /* Mask off gen bits we don't care about */
4039 gen_bits &= and_mask[*offset];
4041 return __builtin_popcount(gen_bits);
4044 static inline int16_t
4045 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4046 struct dlb2_eventdev_port *ev_port,
4047 struct rte_event *events,
4049 uint64_t dequeue_timeout_ticks)
4052 uint64_t start_ticks = 0ULL;
4053 struct dlb2_port *qm_port;
4056 qm_port = &ev_port->qm_port;
4058 /* We have a special implementation for waiting. Wait can be:
4059 * 1) no waiting at all
4061 * 3) wait for interrupt. If wakeup and poll time
4062 * has expired, then return to caller
4063 * 4) umonitor/umwait repeatedly up to poll time
4066 /* If configured for per dequeue wait, then use wait value provided
4067 * to this API. Otherwise we must use the global
4068 * value from eventdev config time.
4070 if (!dlb2->global_dequeue_wait)
4071 timeout = dequeue_timeout_ticks;
4073 timeout = dlb2->global_dequeue_wait_ticks;
4075 start_ticks = rte_get_timer_cycles();
4077 while (num < max_num) {
4078 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4082 /* Copy up to 4 QEs from the current cache line into qes */
4083 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4085 /* But don't process more than the user requested */
4086 num_avail = RTE_MIN(num_avail, max_num - num);
4088 dlb2_inc_cq_idx(qm_port, num_avail);
4090 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4091 num += dlb2_process_dequeue_four_qes(ev_port,
4096 num += dlb2_process_dequeue_qes(ev_port,
4101 else if ((timeout == 0) || (num > 0))
4102 /* Not waiting in any form, or 1+ events received? */
4104 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4105 timeout, start_ticks))
4109 qm_port->owed_tokens += num;
4112 if (qm_port->token_pop_mode == AUTO_POP)
4113 dlb2_consume_qe_immediate(qm_port, num);
4115 ev_port->outstanding_releases += num;
4117 dlb2_port_credits_inc(qm_port, num);
4124 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4127 struct dlb2_eventdev_port *ev_port = event_port;
4128 struct dlb2_port *qm_port = &ev_port->qm_port;
4129 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4132 RTE_ASSERT(ev_port->setup_done);
4133 RTE_ASSERT(ev != NULL);
4135 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4136 uint16_t out_rels = ev_port->outstanding_releases;
4138 dlb2_event_release(dlb2, ev_port->id, out_rels);
4140 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4143 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4144 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4146 cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4148 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4149 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4155 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4157 return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4161 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4162 uint16_t num, uint64_t wait)
4164 struct dlb2_eventdev_port *ev_port = event_port;
4165 struct dlb2_port *qm_port = &ev_port->qm_port;
4166 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4169 RTE_ASSERT(ev_port->setup_done);
4170 RTE_ASSERT(ev != NULL);
4172 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4173 uint16_t out_rels = ev_port->outstanding_releases;
4175 dlb2_event_release(dlb2, ev_port->id, out_rels);
4177 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4180 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4181 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4183 cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4185 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4186 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4191 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4194 return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4198 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4200 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4201 eventdev_stop_flush_t flush;
4202 struct rte_event ev;
4207 flush = dev->dev_ops->dev_stop_flush;
4208 dev_id = dev->data->dev_id;
4209 arg = dev->data->dev_stop_flush_arg;
4211 while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4213 flush(dev_id, ev, arg);
4215 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4218 ev.op = RTE_EVENT_OP_RELEASE;
4220 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4223 /* Enqueue any additional outstanding releases */
4224 ev.op = RTE_EVENT_OP_RELEASE;
4226 for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4227 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4231 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4232 struct dlb2_eventdev_queue *queue)
4234 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4235 struct dlb2_get_ldb_queue_depth_args cfg;
4238 cfg.queue_id = queue->qm_queue.id;
4240 ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4242 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4243 ret, dlb2_error_strings[cfg.response.status]);
4247 return cfg.response.id;
4251 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4252 struct dlb2_eventdev_queue *queue)
4254 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4255 struct dlb2_get_dir_queue_depth_args cfg;
4258 cfg.queue_id = queue->qm_queue.id;
4260 ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4262 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4263 ret, dlb2_error_strings[cfg.response.status]);
4267 return cfg.response.id;
4271 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4272 struct dlb2_eventdev_queue *queue)
4274 if (queue->qm_queue.is_directed)
4275 return dlb2_get_dir_queue_depth(dlb2, queue);
4277 return dlb2_get_ldb_queue_depth(dlb2, queue);
4281 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4282 struct dlb2_eventdev_queue *queue)
4284 return dlb2_get_queue_depth(dlb2, queue) == 0;
4288 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4292 for (i = 0; i < dlb2->num_queues; i++) {
4293 if (dlb2->ev_queues[i].num_links == 0)
4295 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4303 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4307 for (i = 0; i < dlb2->num_queues; i++) {
4308 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4316 dlb2_drain(struct rte_eventdev *dev)
4318 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4319 struct dlb2_eventdev_port *ev_port = NULL;
4323 dev_id = dev->data->dev_id;
4325 while (!dlb2_linked_queues_empty(dlb2)) {
4326 /* Flush all the ev_ports, which will drain all their connected
4329 for (i = 0; i < dlb2->num_ports; i++)
4330 dlb2_flush_port(dev, i);
4333 /* The queues are empty, but there may be events left in the ports. */
4334 for (i = 0; i < dlb2->num_ports; i++)
4335 dlb2_flush_port(dev, i);
4337 /* If the domain's queues are empty, we're done. */
4338 if (dlb2_queues_empty(dlb2))
4341 /* Else, there must be at least one unlinked load-balanced queue.
4342 * Select a load-balanced port with which to drain the unlinked
4345 for (i = 0; i < dlb2->num_ports; i++) {
4346 ev_port = &dlb2->ev_ports[i];
4348 if (!ev_port->qm_port.is_directed)
4352 if (i == dlb2->num_ports) {
4353 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4358 rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4361 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4366 for (i = 0; i < dlb2->num_queues; i++) {
4370 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4376 /* Link the ev_port to the queue */
4377 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4379 DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4384 /* Flush the queue */
4385 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4386 dlb2_flush_port(dev, ev_port->id);
4388 /* Drain any extant events in the ev_port. */
4389 dlb2_flush_port(dev, ev_port->id);
4391 /* Unlink the ev_port from the queue */
4392 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4394 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4402 dlb2_eventdev_stop(struct rte_eventdev *dev)
4404 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4406 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4408 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4409 DLB2_LOG_DBG("Internal error: already stopped\n");
4410 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4412 } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4413 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4414 (int)dlb2->run_state);
4415 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4419 dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4421 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4425 dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4429 dlb2_eventdev_close(struct rte_eventdev *dev)
4431 dlb2_hw_reset_sched_domain(dev, false);
4437 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4442 /* This function intentionally left blank. */
4446 dlb2_eventdev_port_release(void *port)
4448 struct dlb2_eventdev_port *ev_port = port;
4449 struct dlb2_port *qm_port;
4452 qm_port = &ev_port->qm_port;
4453 if (qm_port->config_state == DLB2_CONFIGURED)
4454 dlb2_free_qe_mem(qm_port);
4459 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4460 uint64_t *timeout_ticks)
4463 uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4465 *timeout_ticks = ns * cycles_per_ns;
4471 dlb2_entry_points_init(struct rte_eventdev *dev)
4473 struct dlb2_eventdev *dlb2;
4475 /* Expose PMD's eventdev interface */
4476 static struct eventdev_ops dlb2_eventdev_entry_ops = {
4477 .dev_infos_get = dlb2_eventdev_info_get,
4478 .dev_configure = dlb2_eventdev_configure,
4479 .dev_start = dlb2_eventdev_start,
4480 .dev_stop = dlb2_eventdev_stop,
4481 .dev_close = dlb2_eventdev_close,
4482 .queue_def_conf = dlb2_eventdev_queue_default_conf_get,
4483 .queue_setup = dlb2_eventdev_queue_setup,
4484 .queue_release = dlb2_eventdev_queue_release,
4485 .port_def_conf = dlb2_eventdev_port_default_conf_get,
4486 .port_setup = dlb2_eventdev_port_setup,
4487 .port_release = dlb2_eventdev_port_release,
4488 .port_link = dlb2_eventdev_port_link,
4489 .port_unlink = dlb2_eventdev_port_unlink,
4490 .port_unlinks_in_progress =
4491 dlb2_eventdev_port_unlinks_in_progress,
4492 .timeout_ticks = dlb2_eventdev_timeout_ticks,
4493 .dump = dlb2_eventdev_dump,
4494 .xstats_get = dlb2_eventdev_xstats_get,
4495 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4496 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4497 .xstats_reset = dlb2_eventdev_xstats_reset,
4498 .dev_selftest = test_dlb2_eventdev,
4501 /* Expose PMD's eventdev interface */
4503 dev->dev_ops = &dlb2_eventdev_entry_ops;
4504 dev->enqueue = dlb2_event_enqueue;
4505 dev->enqueue_burst = dlb2_event_enqueue_burst;
4506 dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4507 dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4509 dlb2 = dev->data->dev_private;
4510 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4511 dev->dequeue = dlb2_event_dequeue_sparse;
4512 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4514 dev->dequeue = dlb2_event_dequeue;
4515 dev->dequeue_burst = dlb2_event_dequeue_burst;
4520 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4522 struct dlb2_devargs *dlb2_args)
4524 struct dlb2_eventdev *dlb2;
4527 dlb2 = dev->data->dev_private;
4529 dlb2->event_dev = dev; /* backlink */
4531 evdev_dlb2_default_info.driver_name = name;
4533 dlb2->max_num_events_override = dlb2_args->max_num_events;
4534 dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4535 dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4536 dlb2->poll_interval = dlb2_args->poll_interval;
4537 dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4538 dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4539 dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4540 dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4541 dlb2->max_cq_depth = dlb2_args->max_cq_depth;
4543 err = dlb2_iface_open(&dlb2->qm_instance, name);
4545 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4550 err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4553 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4558 err = dlb2_hw_query_resources(dlb2);
4560 DLB2_LOG_ERR("get resources err=%d for %s\n",
4565 dlb2_iface_hardware_init(&dlb2->qm_instance);
4567 err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4569 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4574 /* Complete xtstats runtime initialization */
4575 err = dlb2_xstats_init(dlb2);
4577 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4581 /* Initialize each port's token pop mode */
4582 for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4583 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4585 rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4587 dlb2_iface_low_level_io_init();
4589 dlb2_entry_points_init(dev);
4591 dlb2_init_queue_depth_thresholds(dlb2,
4592 dlb2_args->qid_depth_thresholds.val);
4598 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4601 struct dlb2_eventdev *dlb2;
4604 dlb2 = dev->data->dev_private;
4606 evdev_dlb2_default_info.driver_name = name;
4608 err = dlb2_iface_open(&dlb2->qm_instance, name);
4610 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4615 err = dlb2_hw_query_resources(dlb2);
4617 DLB2_LOG_ERR("get resources err=%d for %s\n",
4622 dlb2_iface_low_level_io_init();
4624 dlb2_entry_points_init(dev);
4630 dlb2_parse_params(const char *params,
4632 struct dlb2_devargs *dlb2_args,
4636 static const char * const args[] = { NUMA_NODE_ARG,
4637 DLB2_MAX_NUM_EVENTS,
4638 DLB2_NUM_DIR_CREDITS,
4640 DLB2_QID_DEPTH_THRESH_ARG,
4642 DLB2_POLL_INTERVAL_ARG,
4643 DLB2_SW_CREDIT_QUANTA_ARG,
4644 DLB2_HW_CREDIT_QUANTA_ARG,
4645 DLB2_DEPTH_THRESH_ARG,
4646 DLB2_VECTOR_OPTS_ENAB_ARG,
4650 if (params != NULL && params[0] != '\0') {
4651 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4653 if (kvlist == NULL) {
4655 "Ignoring unsupported parameters when creating device '%s'\n",
4658 int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4660 &dlb2_args->socket_id);
4662 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4664 rte_kvargs_free(kvlist);
4668 ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4670 &dlb2_args->max_num_events);
4672 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4674 rte_kvargs_free(kvlist);
4678 if (version == DLB2_HW_V2) {
4679 ret = rte_kvargs_process(kvlist,
4680 DLB2_NUM_DIR_CREDITS,
4681 set_num_dir_credits,
4682 &dlb2_args->num_dir_credits_override);
4684 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4686 rte_kvargs_free(kvlist);
4690 ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4692 &dlb2_args->dev_id);
4694 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4696 rte_kvargs_free(kvlist);
4700 if (version == DLB2_HW_V2) {
4701 ret = rte_kvargs_process(
4703 DLB2_QID_DEPTH_THRESH_ARG,
4704 set_qid_depth_thresh,
4705 &dlb2_args->qid_depth_thresholds);
4707 ret = rte_kvargs_process(
4709 DLB2_QID_DEPTH_THRESH_ARG,
4710 set_qid_depth_thresh_v2_5,
4711 &dlb2_args->qid_depth_thresholds);
4714 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4716 rte_kvargs_free(kvlist);
4720 ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4722 &dlb2_args->cos_id);
4724 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4726 rte_kvargs_free(kvlist);
4730 ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4732 &dlb2_args->poll_interval);
4734 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4736 rte_kvargs_free(kvlist);
4740 ret = rte_kvargs_process(kvlist,
4741 DLB2_SW_CREDIT_QUANTA_ARG,
4742 set_sw_credit_quanta,
4743 &dlb2_args->sw_credit_quanta);
4745 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4747 rte_kvargs_free(kvlist);
4751 ret = rte_kvargs_process(kvlist,
4752 DLB2_HW_CREDIT_QUANTA_ARG,
4753 set_hw_credit_quanta,
4754 &dlb2_args->hw_credit_quanta);
4756 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4758 rte_kvargs_free(kvlist);
4762 ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4763 set_default_depth_thresh,
4764 &dlb2_args->default_depth_thresh);
4766 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4768 rte_kvargs_free(kvlist);
4772 ret = rte_kvargs_process(kvlist,
4773 DLB2_VECTOR_OPTS_ENAB_ARG,
4774 set_vector_opts_enab,
4775 &dlb2_args->vector_opts_enabled);
4777 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4779 rte_kvargs_free(kvlist);
4783 ret = rte_kvargs_process(kvlist,
4786 &dlb2_args->max_cq_depth);
4788 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4790 rte_kvargs_free(kvlist);
4794 rte_kvargs_free(kvlist);
4799 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);