From: Xiaoyun Li Date: Tue, 2 Jul 2019 06:25:19 +0000 (+0800) Subject: raw/ntb: support Intel NTB X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=034c328eb0254e1577ccdf3274acbe80d137b5a7;p=dpdk.git raw/ntb: support Intel NTB Add in the list of registers for the device. And enable NTB device ops for Intel Skylake platform. Signed-off-by: Xiaoyun Li Acked-by: Jingjing Wu Reviewed-by: Xiaolong Ye --- diff --git a/doc/guides/rawdevs/ntb.rst b/doc/guides/rawdevs/ntb.rst index 9559ea4b03..ff0795b47f 100644 --- a/doc/guides/rawdevs/ntb.rst +++ b/doc/guides/rawdevs/ntb.rst @@ -9,6 +9,22 @@ separate hosts so that they can communicate with each other. Thus, many user cases can benefit from this, such as fault tolerance and visual acceleration. +BIOS setting on Intel Skylake +----------------------------- + +Intel Non-transparent Bridge needs special BIOS setting. Since the PMD only +supports Intel Skylake platform, introduce BIOS setting here. The referencce +is https://www.intel.com/content/dam/support/us/en/documents/server-products/Intel_Xeon_Processor_Scalable_Family_BIOS_User_Guide.pdf + +- Set the needed PCIe port as NTB to NTB mode on both hosts. +- Enable NTB bars and set bar size of bar 23 and bar 45 as 12-29 (2K-512M) + on both hosts. Note that bar size on both hosts should be the same. +- Disable split bars for both hosts. +- Set crosslink control override as DSD/USP on one host, USD/DSP on + another host. +- Disable PCIe PII SSC (Spread Spectrum Clocking) for both hosts. This + is a hardware requirement. + Build Options ------------- @@ -16,7 +32,16 @@ Build Options Toggle compilation of the ``ntb`` driver. +Device Setup +------------ + +The Intel NTB devices need to be bound to a DPDK-supported kernel driver +to use, i.e. igb_uio, vfio. The ``dpdk-devbind.py`` script can be used to +show devices status and to bind them to a suitable kernel driver. They will +appear under the category of "Misc (rawdev) devices". + Limitation ---------- - The FIFO hasn't been introduced and will come in 19.11 release. +- This PMD only supports Intel Skylake platform. diff --git a/drivers/raw/ntb/Makefile b/drivers/raw/ntb/Makefile index ec53edd33f..edd49fe754 100644 --- a/drivers/raw/ntb/Makefile +++ b/drivers/raw/ntb/Makefile @@ -23,5 +23,6 @@ LIBABIVER := 1 # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_PMD_NTB_RAWDEV) += ntb.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_NTB_RAWDEV) += ntb_hw_intel.c include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/raw/ntb/meson.build b/drivers/raw/ntb/meson.build index 0fb44452cc..7f39437f87 100644 --- a/drivers/raw/ntb/meson.build +++ b/drivers/raw/ntb/meson.build @@ -3,5 +3,6 @@ deps += ['rawdev', 'mbuf', 'mempool', 'pci', 'bus_pci'] -sources = files('ntb.c') +sources = files('ntb.c', + 'ntb_hw_intel.c') allow_experimental_apis = true diff --git a/drivers/raw/ntb/ntb.c b/drivers/raw/ntb/ntb.c index 2ae3cbea0f..5fde704b72 100644 --- a/drivers/raw/ntb/ntb.c +++ b/drivers/raw/ntb/ntb.c @@ -18,11 +18,13 @@ #include #include +#include "ntb_hw_intel.h" #include "ntb.h" int ntb_logtype; static const struct rte_pci_id pci_id_ntb_map[] = { + { RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -353,6 +355,9 @@ ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev) hw->link_width = NTB_WIDTH_NONE; switch (pci_dev->id.device_id) { + case NTB_INTEL_DEV_ID_B2B_SKX: + hw->ntb_ops = &intel_ntb_ops; + break; default: NTB_LOG(ERR, "Not supported device."); return -EINVAL; diff --git a/drivers/raw/ntb/ntb_hw_intel.c b/drivers/raw/ntb/ntb_hw_intel.c new file mode 100644 index 0000000000..21eaa85118 --- /dev/null +++ b/drivers/raw/ntb/ntb_hw_intel.c @@ -0,0 +1,369 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ntb.h" +#include "ntb_hw_intel.h" + +enum xeon_ntb_bar { + XEON_NTB_BAR23 = 2, + XEON_NTB_BAR45 = 4, +}; + +static enum xeon_ntb_bar intel_ntb_bar[] = { + XEON_NTB_BAR23, + XEON_NTB_BAR45, +}; + +static int +intel_ntb_dev_init(struct rte_rawdev *dev) +{ + struct ntb_hw *hw = dev->dev_private; + uint8_t reg_val, bar; + int ret, i; + + if (hw == NULL) { + NTB_LOG(ERR, "Invalid device."); + return -EINVAL; + } + + ret = rte_pci_read_config(hw->pci_dev, ®_val, + sizeof(reg_val), XEON_PPD_OFFSET); + if (ret < 0) { + NTB_LOG(ERR, "Cannot get NTB PPD (PCIe port definition)."); + return -EIO; + } + + /* Check connection topo type. Only support B2B. */ + switch (reg_val & XEON_PPD_CONN_MASK) { + case XEON_PPD_CONN_B2B: + NTB_LOG(INFO, "Topo B2B (back to back) is using."); + break; + case XEON_PPD_CONN_TRANSPARENT: + case XEON_PPD_CONN_RP: + default: + NTB_LOG(ERR, "Not supported conn topo. Please use B2B."); + return -EINVAL; + } + + /* Check device type. */ + if (reg_val & XEON_PPD_DEV_DSD) { + NTB_LOG(INFO, "DSD, Downstream Device."); + hw->topo = NTB_TOPO_B2B_DSD; + } else { + NTB_LOG(INFO, "USD, Upstream device."); + hw->topo = NTB_TOPO_B2B_USD; + } + + /* Check if bar4 is split. Do not support split bar. */ + if (reg_val & XEON_PPD_SPLIT_BAR_MASK) { + NTB_LOG(ERR, "Do not support split bar."); + return -EINVAL; + } + + hw->hw_addr = (char *)hw->pci_dev->mem_resource[0].addr; + + hw->mw_cnt = XEON_MW_COUNT; + hw->db_cnt = XEON_DB_COUNT; + hw->spad_cnt = XEON_SPAD_COUNT; + + hw->mw_size = rte_zmalloc("uint64_t", + hw->mw_cnt * sizeof(uint64_t), 0); + for (i = 0; i < hw->mw_cnt; i++) { + bar = intel_ntb_bar[i]; + hw->mw_size[i] = hw->pci_dev->mem_resource[bar].len; + } + + /* Reserve the last 2 spad registers for users. */ + for (i = 0; i < NTB_SPAD_USER_MAX_NUM; i++) + hw->spad_user_list[i] = hw->spad_cnt; + hw->spad_user_list[0] = hw->spad_cnt - 2; + hw->spad_user_list[1] = hw->spad_cnt - 1; + + return 0; +} + +static void * +intel_ntb_get_peer_mw_addr(struct rte_rawdev *dev, int mw_idx) +{ + struct ntb_hw *hw = dev->dev_private; + uint8_t bar; + + if (hw == NULL) { + NTB_LOG(ERR, "Invalid device."); + return 0; + } + + if (mw_idx < 0 || mw_idx >= hw->mw_cnt) { + NTB_LOG(ERR, "Invalid memory window index (0 - %u).", + hw->mw_cnt - 1); + return 0; + } + + bar = intel_ntb_bar[mw_idx]; + + return hw->pci_dev->mem_resource[bar].addr; +} + +static int +intel_ntb_mw_set_trans(struct rte_rawdev *dev, int mw_idx, + uint64_t addr, uint64_t size) +{ + struct ntb_hw *hw = dev->dev_private; + void *xlat_addr, *limit_addr; + uint64_t xlat_off, limit_off; + uint64_t base, limit; + uint8_t bar; + + if (hw == NULL) { + NTB_LOG(ERR, "Invalid device."); + return -EINVAL; + } + + if (mw_idx < 0 || mw_idx >= hw->mw_cnt) { + NTB_LOG(ERR, "Invalid memory window index (0 - %u).", + hw->mw_cnt - 1); + return -EINVAL; + } + + bar = intel_ntb_bar[mw_idx]; + + xlat_off = XEON_IMBAR1XBASE_OFFSET + mw_idx * XEON_BAR_INTERVAL_OFFSET; + limit_off = XEON_IMBAR1XLMT_OFFSET + mw_idx * XEON_BAR_INTERVAL_OFFSET; + xlat_addr = hw->hw_addr + xlat_off; + limit_addr = hw->hw_addr + limit_off; + + /* Limit reg val should be EMBAR base address plus MW size. */ + base = addr; + limit = hw->pci_dev->mem_resource[bar].phys_addr + size; + rte_write64(base, xlat_addr); + rte_write64(limit, limit_addr); + + /* Setup the external point so that remote can access. */ + xlat_off = XEON_EMBAR1_OFFSET + 8 * mw_idx; + xlat_addr = hw->hw_addr + xlat_off; + limit_off = XEON_EMBAR1XLMT_OFFSET + mw_idx * XEON_BAR_INTERVAL_OFFSET; + limit_addr = hw->hw_addr + limit_off; + base = rte_read64(xlat_addr); + base &= ~0xf; + limit = base + size; + rte_write64(limit, limit_addr); + + return 0; +} + +static int +intel_ntb_get_link_status(struct rte_rawdev *dev) +{ + struct ntb_hw *hw = dev->dev_private; + uint16_t reg_val; + int ret; + + if (hw == NULL) { + NTB_LOG(ERR, "Invalid device."); + return -EINVAL; + } + + ret = rte_pci_read_config(hw->pci_dev, ®_val, + sizeof(reg_val), XEON_LINK_STATUS_OFFSET); + if (ret < 0) { + NTB_LOG(ERR, "Unable to get link status."); + return -EIO; + } + + hw->link_status = NTB_LNK_STA_ACTIVE(reg_val); + + if (hw->link_status) { + hw->link_speed = NTB_LNK_STA_SPEED(reg_val); + hw->link_width = NTB_LNK_STA_WIDTH(reg_val); + } else { + hw->link_speed = NTB_SPEED_NONE; + hw->link_width = NTB_WIDTH_NONE; + } + + return 0; +} + +static int +intel_ntb_set_link(struct rte_rawdev *dev, bool up) +{ + struct ntb_hw *hw = dev->dev_private; + uint32_t ntb_ctrl, reg_off; + void *reg_addr; + + reg_off = XEON_NTBCNTL_OFFSET; + reg_addr = hw->hw_addr + reg_off; + ntb_ctrl = rte_read32(reg_addr); + + if (up) { + ntb_ctrl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK); + ntb_ctrl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP; + ntb_ctrl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP; + } else { + ntb_ctrl &= ~(NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP); + ntb_ctrl &= ~(NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP); + ntb_ctrl |= NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK; + } + + rte_write32(ntb_ctrl, reg_addr); + + return 0; +} + +static uint32_t +intel_ntb_spad_read(struct rte_rawdev *dev, int spad, bool peer) +{ + struct ntb_hw *hw = dev->dev_private; + uint32_t spad_v, reg_off; + void *reg_addr; + + if (spad < 0 || spad >= hw->spad_cnt) { + NTB_LOG(ERR, "Invalid spad reg index."); + return 0; + } + + /* When peer is true, read peer spad reg */ + reg_off = peer ? XEON_B2B_SPAD_OFFSET : XEON_IM_SPAD_OFFSET; + reg_addr = hw->hw_addr + reg_off + (spad << 2); + spad_v = rte_read32(reg_addr); + + return spad_v; +} + +static int +intel_ntb_spad_write(struct rte_rawdev *dev, int spad, + bool peer, uint32_t spad_v) +{ + struct ntb_hw *hw = dev->dev_private; + uint32_t reg_off; + void *reg_addr; + + if (spad < 0 || spad >= hw->spad_cnt) { + NTB_LOG(ERR, "Invalid spad reg index."); + return -EINVAL; + } + + /* When peer is true, write peer spad reg */ + reg_off = peer ? XEON_B2B_SPAD_OFFSET : XEON_IM_SPAD_OFFSET; + reg_addr = hw->hw_addr + reg_off + (spad << 2); + + rte_write32(spad_v, reg_addr); + + return 0; +} + +static uint64_t +intel_ntb_db_read(struct rte_rawdev *dev) +{ + struct ntb_hw *hw = dev->dev_private; + uint64_t db_off, db_bits; + void *db_addr; + + db_off = XEON_IM_INT_STATUS_OFFSET; + db_addr = hw->hw_addr + db_off; + + db_bits = rte_read64(db_addr); + + return db_bits; +} + +static int +intel_ntb_db_clear(struct rte_rawdev *dev, uint64_t db_bits) +{ + struct ntb_hw *hw = dev->dev_private; + uint64_t db_off; + void *db_addr; + + db_off = XEON_IM_INT_STATUS_OFFSET; + db_addr = hw->hw_addr + db_off; + + rte_write64(db_bits, db_addr); + + return 0; +} + +static int +intel_ntb_db_set_mask(struct rte_rawdev *dev, uint64_t db_mask) +{ + struct ntb_hw *hw = dev->dev_private; + uint64_t db_m_off; + void *db_m_addr; + + db_m_off = XEON_IM_INT_DISABLE_OFFSET; + db_m_addr = hw->hw_addr + db_m_off; + + db_mask |= hw->db_mask; + + rte_write64(db_mask, db_m_addr); + + hw->db_mask = db_mask; + + return 0; +} + +static int +intel_ntb_peer_db_set(struct rte_rawdev *dev, uint8_t db_idx) +{ + struct ntb_hw *hw = dev->dev_private; + uint32_t db_off; + void *db_addr; + + if (((uint64_t)1 << db_idx) & ~hw->db_valid_mask) { + NTB_LOG(ERR, "Invalid doorbell."); + return -EINVAL; + } + + db_off = XEON_IM_DOORBELL_OFFSET + db_idx * 4; + db_addr = hw->hw_addr + db_off; + + rte_write32(1, db_addr); + + return 0; +} + +static int +intel_ntb_vector_bind(struct rte_rawdev *dev, uint8_t intr, uint8_t msix) +{ + struct ntb_hw *hw = dev->dev_private; + uint8_t reg_off; + void *reg_addr; + + if (intr >= hw->db_cnt) { + NTB_LOG(ERR, "Invalid intr source."); + return -EINVAL; + } + + /* Bind intr source to msix vector */ + reg_off = XEON_INTVEC_OFFSET; + reg_addr = hw->hw_addr + reg_off + intr; + + rte_write8(msix, reg_addr); + + return 0; +} + +/* operations for primary side of local ntb */ +const struct ntb_dev_ops intel_ntb_ops = { + .ntb_dev_init = intel_ntb_dev_init, + .get_peer_mw_addr = intel_ntb_get_peer_mw_addr, + .mw_set_trans = intel_ntb_mw_set_trans, + .get_link_status = intel_ntb_get_link_status, + .set_link = intel_ntb_set_link, + .spad_read = intel_ntb_spad_read, + .spad_write = intel_ntb_spad_write, + .db_read = intel_ntb_db_read, + .db_clear = intel_ntb_db_clear, + .db_set_mask = intel_ntb_db_set_mask, + .peer_db_set = intel_ntb_peer_db_set, + .vector_bind = intel_ntb_vector_bind, +}; diff --git a/drivers/raw/ntb/ntb_hw_intel.h b/drivers/raw/ntb/ntb_hw_intel.h new file mode 100644 index 0000000000..4d1e64504c --- /dev/null +++ b/drivers/raw/ntb/ntb_hw_intel.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation. + */ + +#ifndef _NTB_HW_INTEL_H_ +#define _NTB_HW_INTEL_H_ + +/* Ntb control and link status */ +#define NTB_CTL_CFG_LOCK 1 +#define NTB_CTL_DISABLE 2 +#define NTB_CTL_S2P_BAR2_SNOOP (1 << 2) +#define NTB_CTL_P2S_BAR2_SNOOP (1 << 4) +#define NTB_CTL_S2P_BAR4_SNOOP (1 << 6) +#define NTB_CTL_P2S_BAR4_SNOOP (1 << 8) +#define NTB_CTL_S2P_BAR5_SNOOP (1 << 12) +#define NTB_CTL_P2S_BAR5_SNOOP (1 << 14) + +#define NTB_LNK_STA_ACTIVE_BIT 0x2000 +#define NTB_LNK_STA_SPEED_MASK 0x000f +#define NTB_LNK_STA_WIDTH_MASK 0x03f0 +#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LNK_STA_ACTIVE_BIT)) +#define NTB_LNK_STA_SPEED(x) ((x) & NTB_LNK_STA_SPEED_MASK) +#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 4) + +/* Intel Skylake Xeon hardware */ +#define XEON_IMBAR1SZ_OFFSET 0x00d0 +#define XEON_IMBAR2SZ_OFFSET 0x00d1 +#define XEON_EMBAR1SZ_OFFSET 0x00d2 +#define XEON_EMBAR2SZ_OFFSET 0x00d3 +#define XEON_DEVCTRL_OFFSET 0x0098 +#define XEON_DEVSTS_OFFSET 0x009a +#define XEON_UNCERRSTS_OFFSET 0x014c +#define XEON_CORERRSTS_OFFSET 0x0158 +#define XEON_LINK_STATUS_OFFSET 0x01a2 + +#define XEON_NTBCNTL_OFFSET 0x0000 +#define XEON_BAR_INTERVAL_OFFSET 0x0010 +#define XEON_IMBAR1XBASE_OFFSET 0x0010 /* SBAR2XLAT */ +#define XEON_IMBAR1XLMT_OFFSET 0x0018 /* SBAR2LMT */ +#define XEON_IMBAR2XBASE_OFFSET 0x0020 /* SBAR4XLAT */ +#define XEON_IMBAR2XLMT_OFFSET 0x0028 /* SBAR4LMT */ +#define XEON_IM_INT_STATUS_OFFSET 0x0040 +#define XEON_IM_INT_DISABLE_OFFSET 0x0048 +#define XEON_IM_SPAD_OFFSET 0x0080 /* SPAD */ +#define XEON_USMEMMISS_OFFSET 0x0070 +#define XEON_INTVEC_OFFSET 0x00d0 +#define XEON_IM_DOORBELL_OFFSET 0x0100 /* SDOORBELL0 */ +#define XEON_B2B_SPAD_OFFSET 0x0180 /* B2B SPAD */ +#define XEON_EMBAR0XBASE_OFFSET 0x4008 /* B2B_XLAT */ +#define XEON_EMBAR1XBASE_OFFSET 0x4010 /* PBAR2XLAT */ +#define XEON_EMBAR1XLMT_OFFSET 0x4018 /* PBAR2LMT */ +#define XEON_EMBAR2XBASE_OFFSET 0x4020 /* PBAR4XLAT */ +#define XEON_EMBAR2XLMT_OFFSET 0x4028 /* PBAR4LMT */ +#define XEON_EM_INT_STATUS_OFFSET 0x4040 +#define XEON_EM_INT_DISABLE_OFFSET 0x4048 +#define XEON_EM_SPAD_OFFSET 0x4080 /* remote SPAD */ +#define XEON_EM_DOORBELL_OFFSET 0x4100 /* PDOORBELL0 */ +#define XEON_SPCICMD_OFFSET 0x4504 /* SPCICMD */ +#define XEON_EMBAR0_OFFSET 0x4510 /* SBAR0BASE */ +#define XEON_EMBAR1_OFFSET 0x4518 /* SBAR23BASE */ +#define XEON_EMBAR2_OFFSET 0x4520 /* SBAR45BASE */ + +#define XEON_PPD_OFFSET 0x00d4 +#define XEON_PPD_CONN_MASK 0x03 +#define XEON_PPD_CONN_TRANSPARENT 0x00 +#define XEON_PPD_CONN_B2B 0x01 +#define XEON_PPD_CONN_RP 0x02 +#define XEON_PPD_DEV_MASK 0x10 +#define XEON_PPD_DEV_USD 0x00 +#define XEON_PPD_DEV_DSD 0x10 +#define XEON_PPD_SPLIT_BAR_MASK 0x40 + + +#define XEON_MW_COUNT 2 + +#define XEON_DB_COUNT 32 +#define XEON_DB_LINK 32 +#define XEON_DB_LINK_BIT (1ULL << XEON_DB_LINK) +#define XEON_DB_MSIX_VECTOR_COUNT 33 +#define XEON_DB_MSIX_VECTOR_SHIFT 1 +#define XEON_DB_TOTAL_SHIFT 33 +#define XEON_SPAD_COUNT 16 + +extern const struct ntb_dev_ops intel_ntb_ops; + +#endif /* _NTB_HW_INTEL_H_ */ diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py index 27b54326f2..542ecffcc5 100755 --- a/usertools/dpdk-devbind.py +++ b/usertools/dpdk-devbind.py @@ -44,6 +44,8 @@ intel_ioat_bdw = {'Class': '08', 'Vendor': '8086', 'Device': '6f20,6f21,6f22,6f2 'SVendor': None, 'SDevice': None} intel_ioat_skx = {'Class': '08', 'Vendor': '8086', 'Device': '2021', 'SVendor': None, 'SDevice': None} +intel_ntb_skx = {'Class': '06', 'Vendor': '8086', 'Device': '201c', + 'SVendor': None, 'SDevice': None} network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class] baseband_devices = [acceleration_class] @@ -51,7 +53,7 @@ crypto_devices = [encryption_class, intel_processor_class] eventdev_devices = [cavium_sso, cavium_tim, octeontx2_sso] mempool_devices = [cavium_fpa, octeontx2_npa] compress_devices = [cavium_zip] -misc_devices = [intel_ioat_bdw, intel_ioat_skx, octeontx2_dma] +misc_devices = [intel_ioat_bdw, intel_ioat_skx, intel_ntb_skx, octeontx2_dma] # global dict ethernet devices present. Dictionary indexed by PCI address. # Each device within this is itself a dictionary of device properties