1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2019 Intel Corporation.
9 #include <rte_common.h>
10 #include <rte_lcore.h>
11 #include <rte_cycles.h>
15 #include <rte_bus_pci.h>
16 #include <rte_memzone.h>
17 #include <rte_memcpy.h>
18 #include <rte_rawdev.h>
19 #include <rte_rawdev_pmd.h>
21 #include "ntb_hw_intel.h"
26 static const struct rte_pci_id pci_id_ntb_map[] = {
27 { RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) },
28 { .vendor_id = 0, /* sentinel */ },
32 ntb_set_mw(struct rte_rawdev *dev, int mw_idx, uint64_t mw_size)
34 struct ntb_hw *hw = dev->dev_private;
35 char mw_name[RTE_MEMZONE_NAMESIZE];
36 const struct rte_memzone *mz;
39 if (hw->ntb_ops->mw_set_trans == NULL) {
40 NTB_LOG(ERR, "Not supported to set mw.");
44 snprintf(mw_name, sizeof(mw_name), "ntb_%d_mw_%d",
47 mz = rte_memzone_lookup(mw_name);
52 * Hardware requires that mapped memory base address should be
53 * aligned with EMBARSZ and needs continuous memzone.
55 mz = rte_memzone_reserve_aligned(mw_name, mw_size, dev->socket_id,
56 RTE_MEMZONE_IOVA_CONTIG, hw->mw_size[mw_idx]);
58 NTB_LOG(ERR, "Cannot allocate aligned memzone.");
63 ret = (*hw->ntb_ops->mw_set_trans)(dev, mw_idx, mz->iova, mw_size);
65 NTB_LOG(ERR, "Cannot set mw translation.");
73 ntb_link_cleanup(struct rte_rawdev *dev)
75 struct ntb_hw *hw = dev->dev_private;
78 if (hw->ntb_ops->spad_write == NULL ||
79 hw->ntb_ops->mw_set_trans == NULL) {
80 NTB_LOG(ERR, "Not supported to clean up link.");
84 /* Clean spad registers. */
85 for (i = 0; i < hw->spad_cnt; i++) {
86 status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
88 NTB_LOG(ERR, "Failed to clean local spad.");
91 /* Clear mw so that peer cannot access local memory.*/
92 for (i = 0; i < hw->mw_cnt; i++) {
93 status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
95 NTB_LOG(ERR, "Failed to clean mw.");
100 ntb_dev_intr_handler(void *param)
102 struct rte_rawdev *dev = (struct rte_rawdev *)param;
103 struct ntb_hw *hw = dev->dev_private;
104 uint32_t mw_size_h, mw_size_l;
105 uint64_t db_bits = 0;
108 if (hw->ntb_ops->db_read == NULL ||
109 hw->ntb_ops->db_clear == NULL ||
110 hw->ntb_ops->peer_db_set == NULL) {
111 NTB_LOG(ERR, "Doorbell is not supported.");
115 db_bits = (*hw->ntb_ops->db_read)(dev);
117 NTB_LOG(ERR, "No doorbells");
119 /* Doorbell 0 is for peer device ready. */
121 NTB_LOG(DEBUG, "DB0: Peer device is up.");
122 /* Clear received doorbell. */
123 (*hw->ntb_ops->db_clear)(dev, 1);
126 * Peer dev is already up. All mw settings are already done.
132 if (hw->ntb_ops->spad_read == NULL ||
133 hw->ntb_ops->spad_write == NULL) {
134 NTB_LOG(ERR, "Scratchpad is not supported.");
138 hw->peer_mw_cnt = (*hw->ntb_ops->spad_read)
139 (dev, SPAD_NUM_MWS, 0);
140 hw->peer_mw_size = rte_zmalloc("uint64_t",
141 hw->peer_mw_cnt * sizeof(uint64_t), 0);
142 for (i = 0; i < hw->mw_cnt; i++) {
143 mw_size_h = (*hw->ntb_ops->spad_read)
144 (dev, SPAD_MW0_SZ_H + 2 * i, 0);
145 mw_size_l = (*hw->ntb_ops->spad_read)
146 (dev, SPAD_MW0_SZ_L + 2 * i, 0);
147 hw->peer_mw_size[i] = ((uint64_t)mw_size_h << 32) |
149 NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
150 hw->peer_mw_size[i]);
156 * Handshake with peer. Spad_write only works when both
157 * devices are up. So write spad again when db is received.
158 * And set db again for the later device who may miss
161 for (i = 0; i < hw->mw_cnt; i++) {
162 (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS,
164 mw_size_h = hw->mw_size[i] >> 32;
165 (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
168 mw_size_l = hw->mw_size[i];
169 (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
172 (*hw->ntb_ops->peer_db_set)(dev, 0);
174 /* To get the link info. */
175 if (hw->ntb_ops->get_link_status == NULL) {
176 NTB_LOG(ERR, "Not supported to get link status.");
179 (*hw->ntb_ops->get_link_status)(dev);
180 NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
181 hw->link_speed, hw->link_width);
185 if (db_bits & (1 << 1)) {
186 NTB_LOG(DEBUG, "DB1: Peer device is down.");
187 /* Clear received doorbell. */
188 (*hw->ntb_ops->db_clear)(dev, 2);
190 /* Peer device will be down, So clean local side too. */
191 ntb_link_cleanup(dev);
194 /* Response peer's dev_stop request. */
195 (*hw->ntb_ops->peer_db_set)(dev, 2);
199 if (db_bits & (1 << 2)) {
200 NTB_LOG(DEBUG, "DB2: Peer device agrees dev to be down.");
201 /* Clear received doorbell. */
202 (*hw->ntb_ops->db_clear)(dev, (1 << 2));
209 ntb_queue_conf_get(struct rte_rawdev *dev __rte_unused,
210 uint16_t queue_id __rte_unused,
211 rte_rawdev_obj_t queue_conf __rte_unused)
216 ntb_queue_setup(struct rte_rawdev *dev __rte_unused,
217 uint16_t queue_id __rte_unused,
218 rte_rawdev_obj_t queue_conf __rte_unused)
224 ntb_queue_release(struct rte_rawdev *dev __rte_unused,
225 uint16_t queue_id __rte_unused)
231 ntb_queue_count(struct rte_rawdev *dev)
233 struct ntb_hw *hw = dev->dev_private;
234 return hw->queue_pairs;
238 ntb_enqueue_bufs(struct rte_rawdev *dev,
239 struct rte_rawdev_buf **buffers,
241 rte_rawdev_obj_t context)
244 RTE_SET_USED(buffers);
246 RTE_SET_USED(context);
252 ntb_dequeue_bufs(struct rte_rawdev *dev,
253 struct rte_rawdev_buf **buffers,
255 rte_rawdev_obj_t context)
258 RTE_SET_USED(buffers);
260 RTE_SET_USED(context);
266 ntb_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info)
268 struct ntb_hw *hw = dev->dev_private;
269 struct ntb_attr *ntb_attrs = dev_info;
271 strncpy(ntb_attrs[NTB_TOPO_ID].name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN);
273 case NTB_TOPO_B2B_DSD:
274 strncpy(ntb_attrs[NTB_TOPO_ID].value, "B2B DSD",
277 case NTB_TOPO_B2B_USD:
278 strncpy(ntb_attrs[NTB_TOPO_ID].value, "B2B USD",
282 strncpy(ntb_attrs[NTB_TOPO_ID].value, "Unsupported",
286 strncpy(ntb_attrs[NTB_LINK_STATUS_ID].name, NTB_LINK_STATUS_NAME,
288 snprintf(ntb_attrs[NTB_LINK_STATUS_ID].value, NTB_ATTR_VAL_LEN,
289 "%d", hw->link_status);
291 strncpy(ntb_attrs[NTB_SPEED_ID].name, NTB_SPEED_NAME,
293 snprintf(ntb_attrs[NTB_SPEED_ID].value, NTB_ATTR_VAL_LEN,
294 "%d", hw->link_speed);
296 strncpy(ntb_attrs[NTB_WIDTH_ID].name, NTB_WIDTH_NAME,
298 snprintf(ntb_attrs[NTB_WIDTH_ID].value, NTB_ATTR_VAL_LEN,
299 "%d", hw->link_width);
301 strncpy(ntb_attrs[NTB_MW_CNT_ID].name, NTB_MW_CNT_NAME,
303 snprintf(ntb_attrs[NTB_MW_CNT_ID].value, NTB_ATTR_VAL_LEN,
306 strncpy(ntb_attrs[NTB_DB_CNT_ID].name, NTB_DB_CNT_NAME,
308 snprintf(ntb_attrs[NTB_DB_CNT_ID].value, NTB_ATTR_VAL_LEN,
311 strncpy(ntb_attrs[NTB_SPAD_CNT_ID].name, NTB_SPAD_CNT_NAME,
313 snprintf(ntb_attrs[NTB_SPAD_CNT_ID].value, NTB_ATTR_VAL_LEN,
318 ntb_dev_configure(const struct rte_rawdev *dev __rte_unused,
319 rte_rawdev_obj_t config __rte_unused)
325 ntb_dev_start(struct rte_rawdev *dev)
327 struct ntb_hw *hw = dev->dev_private;
330 /* TODO: init queues and start queues. */
332 /* Map memory of bar_size to remote. */
333 hw->mz = rte_zmalloc("struct rte_memzone *",
334 hw->mw_cnt * sizeof(struct rte_memzone *), 0);
335 for (i = 0; i < hw->mw_cnt; i++) {
336 ret = ntb_set_mw(dev, i, hw->mw_size[i]);
338 NTB_LOG(ERR, "Fail to set mw.");
349 ntb_dev_stop(struct rte_rawdev *dev)
351 struct ntb_hw *hw = dev->dev_private;
355 /* TODO: stop rx/tx queues. */
357 if (!hw->peer_dev_up)
360 ntb_link_cleanup(dev);
362 /* Notify the peer that device will be down. */
363 if (hw->ntb_ops->peer_db_set == NULL) {
364 NTB_LOG(ERR, "Peer doorbell setting is not supported.");
367 status = (*hw->ntb_ops->peer_db_set)(dev, 1);
369 NTB_LOG(ERR, "Failed to tell peer device is down.");
374 * Set time out as 1s in case that the peer is stopped accidently
375 * without any notification.
379 /* Wait for cleanup work down before db mask clear. */
380 while (hw->peer_dev_up && time_out) {
386 /* Clear doorbells mask. */
387 if (hw->ntb_ops->db_set_mask == NULL) {
388 NTB_LOG(ERR, "Doorbell mask setting is not supported.");
391 status = (*hw->ntb_ops->db_set_mask)(dev,
392 (((uint64_t)1 << hw->db_cnt) - 1));
394 NTB_LOG(ERR, "Failed to clear doorbells.");
400 ntb_dev_close(struct rte_rawdev *dev)
402 struct ntb_hw *hw = dev->dev_private;
403 struct rte_intr_handle *intr_handle;
409 /* TODO: free queues. */
411 intr_handle = &hw->pci_dev->intr_handle;
412 /* Clean datapath event and vec mapping */
413 rte_intr_efd_disable(intr_handle);
414 if (intr_handle->intr_vec) {
415 rte_free(intr_handle->intr_vec);
416 intr_handle->intr_vec = NULL;
418 /* Disable uio intr before callback unregister */
419 rte_intr_disable(intr_handle);
421 /* Unregister callback func to eal lib */
422 rte_intr_callback_unregister(intr_handle,
423 ntb_dev_intr_handler, dev);
429 ntb_dev_reset(struct rte_rawdev *rawdev __rte_unused)
435 ntb_attr_set(struct rte_rawdev *dev, const char *attr_name,
438 struct ntb_hw *hw = dev->dev_private;
441 if (dev == NULL || attr_name == NULL) {
442 NTB_LOG(ERR, "Invalid arguments for setting attributes");
446 if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
447 if (hw->ntb_ops->spad_write == NULL)
449 index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
450 (*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index],
452 NTB_LOG(INFO, "Set attribute (%s) Value (%" PRIu64 ")",
453 attr_name, attr_value);
457 /* Attribute not found. */
458 NTB_LOG(ERR, "Attribute not found.");
463 ntb_attr_get(struct rte_rawdev *dev, const char *attr_name,
464 uint64_t *attr_value)
466 struct ntb_hw *hw = dev->dev_private;
469 if (dev == NULL || attr_name == NULL || attr_value == NULL) {
470 NTB_LOG(ERR, "Invalid arguments for getting attributes");
474 if (!strncmp(attr_name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN)) {
475 *attr_value = hw->topo;
476 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
477 attr_name, *attr_value);
481 if (!strncmp(attr_name, NTB_LINK_STATUS_NAME, NTB_ATTR_NAME_LEN)) {
482 *attr_value = hw->link_status;
483 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
484 attr_name, *attr_value);
488 if (!strncmp(attr_name, NTB_SPEED_NAME, NTB_ATTR_NAME_LEN)) {
489 *attr_value = hw->link_speed;
490 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
491 attr_name, *attr_value);
495 if (!strncmp(attr_name, NTB_WIDTH_NAME, NTB_ATTR_NAME_LEN)) {
496 *attr_value = hw->link_width;
497 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
498 attr_name, *attr_value);
502 if (!strncmp(attr_name, NTB_MW_CNT_NAME, NTB_ATTR_NAME_LEN)) {
503 *attr_value = hw->mw_cnt;
504 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
505 attr_name, *attr_value);
509 if (!strncmp(attr_name, NTB_DB_CNT_NAME, NTB_ATTR_NAME_LEN)) {
510 *attr_value = hw->db_cnt;
511 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
512 attr_name, *attr_value);
516 if (!strncmp(attr_name, NTB_SPAD_CNT_NAME, NTB_ATTR_NAME_LEN)) {
517 *attr_value = hw->spad_cnt;
518 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
519 attr_name, *attr_value);
523 if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
524 if (hw->ntb_ops->spad_read == NULL)
526 index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
527 *attr_value = (*hw->ntb_ops->spad_read)(dev,
528 hw->spad_user_list[index], 0);
529 NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
530 attr_name, *attr_value);
534 /* Attribute not found. */
535 NTB_LOG(ERR, "Attribute not found.");
540 ntb_xstats_get(const struct rte_rawdev *dev __rte_unused,
541 const unsigned int ids[] __rte_unused,
542 uint64_t values[] __rte_unused,
543 unsigned int n __rte_unused)
549 ntb_xstats_get_names(const struct rte_rawdev *dev __rte_unused,
550 struct rte_rawdev_xstats_name *xstats_names __rte_unused,
551 unsigned int size __rte_unused)
557 ntb_xstats_get_by_name(const struct rte_rawdev *dev __rte_unused,
558 const char *name __rte_unused,
559 unsigned int *id __rte_unused)
565 ntb_xstats_reset(struct rte_rawdev *dev __rte_unused,
566 const uint32_t ids[] __rte_unused,
567 uint32_t nb_ids __rte_unused)
572 static const struct rte_rawdev_ops ntb_ops = {
573 .dev_info_get = ntb_dev_info_get,
574 .dev_configure = ntb_dev_configure,
575 .dev_start = ntb_dev_start,
576 .dev_stop = ntb_dev_stop,
577 .dev_close = ntb_dev_close,
578 .dev_reset = ntb_dev_reset,
580 .queue_def_conf = ntb_queue_conf_get,
581 .queue_setup = ntb_queue_setup,
582 .queue_release = ntb_queue_release,
583 .queue_count = ntb_queue_count,
585 .enqueue_bufs = ntb_enqueue_bufs,
586 .dequeue_bufs = ntb_dequeue_bufs,
588 .attr_get = ntb_attr_get,
589 .attr_set = ntb_attr_set,
591 .xstats_get = ntb_xstats_get,
592 .xstats_get_names = ntb_xstats_get_names,
593 .xstats_get_by_name = ntb_xstats_get_by_name,
594 .xstats_reset = ntb_xstats_reset,
598 ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
600 struct ntb_hw *hw = dev->dev_private;
601 struct rte_intr_handle *intr_handle;
605 hw->pci_dev = pci_dev;
607 hw->link_status = NTB_LINK_DOWN;
608 hw->link_speed = NTB_SPEED_NONE;
609 hw->link_width = NTB_WIDTH_NONE;
611 switch (pci_dev->id.device_id) {
612 case NTB_INTEL_DEV_ID_B2B_SKX:
613 hw->ntb_ops = &intel_ntb_ops;
616 NTB_LOG(ERR, "Not supported device.");
620 if (hw->ntb_ops->ntb_dev_init == NULL)
622 ret = (*hw->ntb_ops->ntb_dev_init)(dev);
624 NTB_LOG(ERR, "Unable to init ntb dev.");
628 if (hw->ntb_ops->set_link == NULL)
630 ret = (*hw->ntb_ops->set_link)(dev, 1);
635 hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
637 intr_handle = &pci_dev->intr_handle;
638 /* Register callback func to eal lib */
639 rte_intr_callback_register(intr_handle,
640 ntb_dev_intr_handler, dev);
642 ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
646 /* To clarify, the interrupt for each doorbell is already mapped
647 * by default for intel gen3. They are mapped to msix vec 1-32,
648 * and hardware intr is mapped to 0. Map all to 0 for uio.
650 if (!rte_intr_cap_multiple(intr_handle)) {
651 for (i = 0; i < hw->db_cnt; i++) {
652 if (hw->ntb_ops->vector_bind == NULL)
654 ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
660 if (hw->ntb_ops->db_set_mask == NULL ||
661 hw->ntb_ops->peer_db_set == NULL) {
662 NTB_LOG(ERR, "Doorbell is not supported.");
666 ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
668 NTB_LOG(ERR, "Unable to enable intr for all dbs.");
672 /* enable uio intr after callback register */
673 rte_intr_enable(intr_handle);
675 if (hw->ntb_ops->spad_write == NULL) {
676 NTB_LOG(ERR, "Scratchpad is not supported.");
679 /* Tell peer the mw_cnt of local side. */
680 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
682 NTB_LOG(ERR, "Failed to tell peer mw count.");
686 /* Tell peer each mw size on local side. */
687 for (i = 0; i < hw->mw_cnt; i++) {
688 NTB_LOG(DEBUG, "Local %u mw size: 0x%"PRIx64"", i,
690 val = hw->mw_size[i] >> 32;
691 ret = (*hw->ntb_ops->spad_write)
692 (dev, SPAD_MW0_SZ_H + 2 * i, 1, val);
694 NTB_LOG(ERR, "Failed to tell peer mw size.");
698 val = hw->mw_size[i];
699 ret = (*hw->ntb_ops->spad_write)
700 (dev, SPAD_MW0_SZ_L + 2 * i, 1, val);
702 NTB_LOG(ERR, "Failed to tell peer mw size.");
707 /* Ring doorbell 0 to tell peer the device is ready. */
708 ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
710 NTB_LOG(ERR, "Failed to tell peer device is probed.");
718 ntb_create(struct rte_pci_device *pci_dev, int socket_id)
720 char name[RTE_RAWDEV_NAME_MAX_LEN];
721 struct rte_rawdev *rawdev = NULL;
724 if (pci_dev == NULL) {
725 NTB_LOG(ERR, "Invalid pci_dev.");
729 memset(name, 0, sizeof(name));
730 snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
731 pci_dev->addr.bus, pci_dev->addr.devid,
732 pci_dev->addr.function);
734 NTB_LOG(INFO, "Init %s on NUMA node %d", name, socket_id);
736 /* Allocate device structure. */
737 rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct ntb_hw),
739 if (rawdev == NULL) {
740 NTB_LOG(ERR, "Unable to allocate rawdev.");
744 rawdev->dev_ops = &ntb_ops;
745 rawdev->device = &pci_dev->device;
746 rawdev->driver_name = pci_dev->driver->driver.name;
748 ret = ntb_init_hw(rawdev, pci_dev);
750 NTB_LOG(ERR, "Unable to init ntb hw.");
758 rte_rawdev_pmd_release(rawdev);
764 ntb_destroy(struct rte_pci_device *pci_dev)
766 char name[RTE_RAWDEV_NAME_MAX_LEN];
767 struct rte_rawdev *rawdev;
770 if (pci_dev == NULL) {
771 NTB_LOG(ERR, "Invalid pci_dev.");
776 memset(name, 0, sizeof(name));
777 snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
778 pci_dev->addr.bus, pci_dev->addr.devid,
779 pci_dev->addr.function);
781 NTB_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id());
783 rawdev = rte_rawdev_pmd_get_named_dev(name);
784 if (rawdev == NULL) {
785 NTB_LOG(ERR, "Invalid device name (%s)", name);
790 ret = rte_rawdev_pmd_release(rawdev);
792 NTB_LOG(ERR, "Failed to destroy ntb rawdev.");
798 ntb_probe(struct rte_pci_driver *pci_drv __rte_unused,
799 struct rte_pci_device *pci_dev)
801 return ntb_create(pci_dev, rte_socket_id());
805 ntb_remove(struct rte_pci_device *pci_dev)
807 return ntb_destroy(pci_dev);
811 static struct rte_pci_driver rte_ntb_pmd = {
812 .id_table = pci_id_ntb_map,
813 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
815 .remove = ntb_remove,
818 RTE_PMD_REGISTER_PCI(raw_ntb, rte_ntb_pmd);
819 RTE_PMD_REGISTER_PCI_TABLE(raw_ntb, pci_id_ntb_map);
820 RTE_PMD_REGISTER_KMOD_DEP(raw_ntb, "* igb_uio | uio_pci_generic | vfio-pci");
822 RTE_INIT(ntb_init_log)
824 ntb_logtype = rte_log_register("pmd.raw.ntb");
825 if (ntb_logtype >= 0)
826 rte_log_set_level(ntb_logtype, RTE_LOG_DEBUG);