X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fnfp%2Fnfp_net.c;h=a46c4dd1cc0c8eb02e7147a89c6a2b54ce186a0a;hb=7f8e73201dae6e605df6a9cdc24d9004b2590424;hp=9ea24e5bda21fd573d1482ce485725ff1250e792;hpb=a36974f43c69f7a714ffec237461854ae924ce29;p=dpdk.git diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 9ea24e5bda..a46c4dd1cc 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -17,8 +17,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -30,6 +30,8 @@ #include #include +#include "eal_firmware.h" + #include "nfpcore/nfp_cpp.h" #include "nfpcore/nfp_nffw.h" #include "nfpcore/nfp_hwinfo.h" @@ -38,8 +40,10 @@ #include "nfpcore/nfp_nsp.h" #include "nfp_net_pmd.h" +#include "nfp_rxtx.h" #include "nfp_net_logs.h" #include "nfp_net_ctrl.h" +#include "nfp_cpp_bridge.h" #include #include @@ -51,226 +55,17 @@ /* Prototypes */ static int nfp_net_close(struct rte_eth_dev *dev); -static int nfp_net_configure(struct rte_eth_dev *dev); -static void nfp_net_dev_interrupt_handler(void *param); -static void nfp_net_dev_interrupt_delayed_handler(void *param); -static int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); -static int nfp_net_infos_get(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); static int nfp_net_init(struct rte_eth_dev *eth_dev); -static int nfp_net_link_update(struct rte_eth_dev *dev, int wait_to_complete); -static int nfp_net_promisc_enable(struct rte_eth_dev *dev); -static int nfp_net_promisc_disable(struct rte_eth_dev *dev); -static int nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq); -static uint32_t nfp_net_rx_queue_count(struct rte_eth_dev *dev, - uint16_t queue_idx); -static uint16_t nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); -static void nfp_net_rx_queue_release(void *rxq); -static int nfp_net_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp); -static int nfp_net_tx_free_bufs(struct nfp_net_txq *txq); -static void nfp_net_tx_queue_release(void *txq); -static int nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); -static int nfp_net_start(struct rte_eth_dev *dev); -static int nfp_net_stats_get(struct rte_eth_dev *dev, - struct rte_eth_stats *stats); -static int nfp_net_stats_reset(struct rte_eth_dev *dev); +static int nfp_pf_init(struct rte_pci_device *pci_dev); +static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev); +static int nfp_net_pf_read_mac(struct nfp_pf_dev *pf_dev, int port); +static int nfp_pci_uninit(struct rte_eth_dev *eth_dev); +static int nfp_init_phyports(struct nfp_pf_dev *pf_dev); static int nfp_net_stop(struct rte_eth_dev *dev); -static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); - -static int nfp_net_rss_config_default(struct rte_eth_dev *dev); -static int nfp_net_rss_hash_update(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -static int nfp_net_rss_reta_write(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); -static int nfp_net_rss_hash_write(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -static int nfp_set_mac_addr(struct rte_eth_dev *dev, - struct rte_ether_addr *mac_addr); - -/* The offset of the queue controller queues in the PCIe Target */ -#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) - -/* Maximum value which can be added to a queue with one transaction */ -#define NFP_QCP_MAX_ADD 0x7f - -#define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \ - (uint64_t)((mb)->buf_iova + RTE_PKTMBUF_HEADROOM) - -/* nfp_qcp_ptr - Read or Write Pointer of a queue */ -enum nfp_qcp_ptr { - NFP_QCP_READ_PTR = 0, - NFP_QCP_WRITE_PTR -}; - -/* - * nfp_qcp_ptr_add - Add the value to the selected pointer of a queue - * @q: Base address for queue structure - * @ptr: Add to the Read or Write pointer - * @val: Value to add to the queue pointer - * - * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. - */ -static inline void -nfp_qcp_ptr_add(uint8_t *q, enum nfp_qcp_ptr ptr, uint32_t val) -{ - uint32_t off; - - if (ptr == NFP_QCP_READ_PTR) - off = NFP_QCP_QUEUE_ADD_RPTR; - else - off = NFP_QCP_QUEUE_ADD_WPTR; - - while (val > NFP_QCP_MAX_ADD) { - nn_writel(rte_cpu_to_le_32(NFP_QCP_MAX_ADD), q + off); - val -= NFP_QCP_MAX_ADD; - } - - nn_writel(rte_cpu_to_le_32(val), q + off); -} - -/* - * nfp_qcp_read - Read the current Read/Write pointer value for a queue - * @q: Base address for queue structure - * @ptr: Read or Write pointer - */ -static inline uint32_t -nfp_qcp_read(uint8_t *q, enum nfp_qcp_ptr ptr) -{ - uint32_t off; - uint32_t val; - - if (ptr == NFP_QCP_READ_PTR) - off = NFP_QCP_QUEUE_STS_LO; - else - off = NFP_QCP_QUEUE_STS_HI; - - val = rte_cpu_to_le_32(nn_readl(q + off)); - - if (ptr == NFP_QCP_READ_PTR) - return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; - else - return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; -} - -/* - * Functions to read/write from/to Config BAR - * Performs any endian conversion necessary. - */ -static inline uint8_t -nn_cfg_readb(struct nfp_net_hw *hw, int off) -{ - return nn_readb(hw->ctrl_bar + off); -} - -static inline void -nn_cfg_writeb(struct nfp_net_hw *hw, int off, uint8_t val) -{ - nn_writeb(val, hw->ctrl_bar + off); -} - -static inline uint32_t -nn_cfg_readl(struct nfp_net_hw *hw, int off) -{ - return rte_le_to_cpu_32(nn_readl(hw->ctrl_bar + off)); -} - -static inline void -nn_cfg_writel(struct nfp_net_hw *hw, int off, uint32_t val) -{ - nn_writel(rte_cpu_to_le_32(val), hw->ctrl_bar + off); -} - -static inline uint64_t -nn_cfg_readq(struct nfp_net_hw *hw, int off) -{ - return rte_le_to_cpu_64(nn_readq(hw->ctrl_bar + off)); -} - -static inline void -nn_cfg_writeq(struct nfp_net_hw *hw, int off, uint64_t val) -{ - nn_writeq(rte_cpu_to_le_64(val), hw->ctrl_bar + off); -} - -static void -nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) -{ - unsigned i; - - if (rxq->rxbufs == NULL) - return; - - for (i = 0; i < rxq->rx_count; i++) { - if (rxq->rxbufs[i].mbuf) { - rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); - rxq->rxbufs[i].mbuf = NULL; - } - } -} - -static void -nfp_net_rx_queue_release(void *rx_queue) -{ - struct nfp_net_rxq *rxq = rx_queue; - - if (rxq) { - nfp_net_rx_queue_release_mbufs(rxq); - rte_free(rxq->rxbufs); - rte_free(rxq); - } -} - -static void -nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) -{ - nfp_net_rx_queue_release_mbufs(rxq); - rxq->rd_p = 0; - rxq->nb_rx_hold = 0; -} - -static void -nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) -{ - unsigned i; - - if (txq->txbufs == NULL) - return; - - for (i = 0; i < txq->tx_count; i++) { - if (txq->txbufs[i].mbuf) { - rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); - txq->txbufs[i].mbuf = NULL; - } - } -} - -static void -nfp_net_tx_queue_release(void *tx_queue) -{ - struct nfp_net_txq *txq = tx_queue; - - if (txq) { - nfp_net_tx_queue_release_mbufs(txq); - rte_free(txq->txbufs); - rte_free(txq); - } -} - -static void -nfp_net_reset_tx_queue(struct nfp_net_txq *txq) -{ - nfp_net_tx_queue_release_mbufs(txq); - txq->wr_p = 0; - txq->rd_p = 0; -} +static int nfp_fw_setup(struct rte_pci_device *dev, + struct nfp_cpp *cpp, + struct nfp_eth_table *nfp_eth_table, + struct nfp_hwinfo *hwinfo); static int __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update) @@ -321,7 +116,7 @@ __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update) * Write the update word to the BAR and ping the reconfig queue. Then poll * until the firmware has acknowledged the update by zeroing the update word. */ -static int +int nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update) { uint32_t err; @@ -357,7 +152,7 @@ nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update) * before any other function in the Ethernet API. This function can * also be re-invoked when a device is in the stopped state. */ -static int +int nfp_net_configure(struct rte_eth_dev *dev) { struct rte_eth_conf *dev_conf; @@ -400,7 +195,7 @@ nfp_net_configure(struct rte_eth_dev *dev) return 0; } -static void +void nfp_net_enable_queues(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -424,7 +219,7 @@ nfp_net_enable_queues(struct rte_eth_dev *dev) nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, enabled_queues); } -static void +void nfp_net_disable_queues(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -449,26 +244,14 @@ nfp_net_disable_queues(struct rte_eth_dev *dev) hw->ctrl = new_ctrl; } -static int -nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) -{ - int i; - - for (i = 0; i < dev->data->nb_rx_queues; i++) { - if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0) - return -1; - } - return 0; -} - -static void +void nfp_net_params_setup(struct nfp_net_hw *hw) { nn_cfg_writel(hw, NFP_NET_CFG_MTU, hw->mtu); nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, hw->flbufsz); } -static void +void nfp_net_cfg_queue_setup(struct nfp_net_hw *hw) { hw->qcp_cfg = hw->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; @@ -476,7 +259,7 @@ nfp_net_cfg_queue_setup(struct nfp_net_hw *hw) #define ETH_ADDR_LEN 6 -static void +void nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src) { int i; @@ -486,16 +269,16 @@ nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src) } static int -nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port) +nfp_net_pf_read_mac(struct nfp_pf_dev *pf_dev, int port) { struct nfp_eth_table *nfp_eth_table; + struct nfp_net_hw *hw = NULL; + + /* Grab a pointer to the correct physical port */ + hw = pf_dev->ports[port]; + + nfp_eth_table = nfp_eth_read_ports(pf_dev->cpp); - nfp_eth_table = nfp_eth_read_ports(hw->cpp); - /* - * hw points to port0 private data. We need hw now pointing to - * right port. - */ - hw += port; nfp_eth_copy_mac((uint8_t *)&hw->mac_addr, (uint8_t *)&nfp_eth_table->ports[port].mac_addr); @@ -503,19 +286,7 @@ nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port) return 0; } -static void -nfp_net_vf_read_mac(struct nfp_net_hw *hw) -{ - uint32_t tmp; - - tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR)); - memcpy(&hw->mac_addr[0], &tmp, 4); - - tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR + 4)); - memcpy(&hw->mac_addr[4], &tmp, 2); -} - -static void +void nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac) { uint32_t mac0 = *(uint32_t *)mac; @@ -563,7 +334,7 @@ nfp_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) return 0; } -static int +int nfp_configure_rx_interrupt(struct rte_eth_dev *dev, struct rte_intr_handle *intr_handle) { @@ -607,7 +378,7 @@ nfp_configure_rx_interrupt(struct rte_eth_dev *dev, return 0; } -static uint32_t +uint32_t nfp_check_offloads(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -674,12 +445,14 @@ nfp_net_start(struct rte_eth_dev *dev) struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t new_ctrl, update = 0; struct nfp_net_hw *hw; + struct nfp_pf_dev *pf_dev; struct rte_eth_conf *dev_conf; struct rte_eth_rxmode *rxmode; uint32_t intr_vector; int ret; hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); + pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(dev->data->dev_private); PMD_INIT_LOG(DEBUG, "Start"); @@ -691,7 +464,7 @@ nfp_net_start(struct rte_eth_dev *dev) /* check and configure queue intr-vector mapping */ if (dev->data->dev_conf.intr_conf.rxq != 0) { - if (hw->pf_multiport_enabled) { + if (pf_dev->multiport) { PMD_INIT_LOG(ERR, "PMD rx interrupt is not supported " "with NFP multiport PF"); return -EINVAL; @@ -755,13 +528,13 @@ nfp_net_start(struct rte_eth_dev *dev) goto error; } - if (hw->is_pf) { + if (hw->is_phyport) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port up */ - nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1); + nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 1); else nfp_eth_set_configured(dev->process_private, - hw->pf_port_idx, 1); + hw->nfp_idx, 1); } hw->ctrl = new_ctrl; @@ -811,13 +584,13 @@ nfp_net_stop(struct rte_eth_dev *dev) (struct nfp_net_rxq *)dev->data->rx_queues[i]); } - if (hw->is_pf) { + if (hw->is_phyport) { if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port down */ - nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0); + nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 0); else nfp_eth_set_configured(dev->process_private, - hw->pf_port_idx, 0); + hw->nfp_idx, 0); } return 0; @@ -833,15 +606,15 @@ nfp_net_set_link_up(struct rte_eth_dev *dev) hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - if (!hw->is_pf) + if (!hw->is_phyport) return -ENOTSUP; if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port down */ - return nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1); + return nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 1); else return nfp_eth_set_configured(dev->process_private, - hw->pf_port_idx, 1); + hw->nfp_idx, 1); } /* Set the link down. */ @@ -854,15 +627,15 @@ nfp_net_set_link_down(struct rte_eth_dev *dev) hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - if (!hw->is_pf) + if (!hw->is_phyport) return -ENOTSUP; if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port down */ - return nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0); + return nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 0); else return nfp_eth_set_configured(dev->process_private, - hw->pf_port_idx, 0); + hw->nfp_idx, 0); } /* Reset and stop device. The device can not be restarted. */ @@ -899,8 +672,34 @@ nfp_net_close(struct rte_eth_dev *dev) (struct nfp_net_rxq *)dev->data->rx_queues[i]); } + /* Only free PF resources after all physical ports have been closed */ + if (pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC || + pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC) { + struct nfp_pf_dev *pf_dev; + pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + /* Mark this port as unused and free device priv resources*/ + nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff); + pf_dev->ports[hw->idx] = NULL; + rte_eth_dev_release_port(dev); + + for (i = 0; i < pf_dev->total_phyports; i++) { + /* Check to see if ports are still in use */ + if (pf_dev->ports[i]) + return 0; + } + + /* Now it is safe to free all PF resources */ + PMD_INIT_LOG(INFO, "Freeing PF resources"); + nfp_cpp_area_free(pf_dev->ctrl_area); + nfp_cpp_area_free(pf_dev->hwqueues_area); + free(pf_dev->hwinfo); + free(pf_dev->sym_tbl); + nfp_cpp_free(pf_dev->cpp); + rte_free(pf_dev); + } + rte_intr_disable(&pci_dev->intr_handle); - nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff); /* unregister callback func from eal lib */ rte_intr_callback_unregister(&pci_dev->intr_handle, @@ -915,7 +714,7 @@ nfp_net_close(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_promisc_enable(struct rte_eth_dev *dev) { uint32_t new_ctrl, update = 0; @@ -952,7 +751,7 @@ nfp_net_promisc_enable(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_promisc_disable(struct rte_eth_dev *dev) { uint32_t new_ctrl, update = 0; @@ -988,7 +787,7 @@ nfp_net_promisc_disable(struct rte_eth_dev *dev) * Wait to complete is needed as it can take up to 9 seconds to get the Link * status. */ -static int +int nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) { struct nfp_net_hw *hw; @@ -1038,7 +837,7 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) return ret; } -static int +int nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { int i; @@ -1133,7 +932,7 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) return -EINVAL; } -static int +int nfp_net_stats_reset(struct rte_eth_dev *dev) { int i; @@ -1198,7 +997,7 @@ nfp_net_stats_reset(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct nfp_net_hw *hw; @@ -1220,9 +1019,6 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM; - dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME | - DEV_RX_OFFLOAD_RSS_HASH; - if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN) dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; @@ -1271,15 +1067,22 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_mtu_seg_max = NFP_TX_MAX_MTU_SEG, }; - dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 | - ETH_RSS_NONFRAG_IPV4_TCP | - ETH_RSS_NONFRAG_IPV4_UDP | - ETH_RSS_IPV6 | - ETH_RSS_NONFRAG_IPV6_TCP | - ETH_RSS_NONFRAG_IPV6_UDP; + /* All NFP devices support jumbo frames */ + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME; + + if (hw->cap & NFP_NET_CFG_CTRL_RSS) { + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_RSS_HASH; - dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ; - dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ; + dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 | + ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_IPV6 | + ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_NONFRAG_IPV6_UDP; + + dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ; + dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ; + } dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G | @@ -1288,7 +1091,7 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) return 0; } -static const uint32_t * +const uint32_t * nfp_net_supported_ptypes_get(struct rte_eth_dev *dev) { static const uint32_t ptypes[] = { @@ -1305,45 +1108,7 @@ nfp_net_supported_ptypes_get(struct rte_eth_dev *dev) return NULL; } -static uint32_t -nfp_net_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx) -{ - struct nfp_net_rxq *rxq; - struct nfp_net_rx_desc *rxds; - uint32_t idx; - uint32_t count; - - rxq = (struct nfp_net_rxq *)dev->data->rx_queues[queue_idx]; - - idx = rxq->rd_p; - - count = 0; - - /* - * Other PMDs are just checking the DD bit in intervals of 4 - * descriptors and counting all four if the first has the DD - * bit on. Of course, this is not accurate but can be good for - * performance. But ideally that should be done in descriptors - * chunks belonging to the same cache line - */ - - while (count < rxq->rx_count) { - rxds = &rxq->rxds[idx]; - if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) - break; - - count++; - idx++; - - /* Wrapping? */ - if ((idx) == rxq->rx_count) - idx = 0; - } - - return count; -} - -static int +int nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct rte_pci_device *pci_dev; @@ -1363,7 +1128,7 @@ nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) return 0; } -static int +int nfp_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { struct rte_pci_device *pci_dev; @@ -1432,7 +1197,32 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) } } +/* + * Interrupt handler which shall be registered for alarm callback for delayed + * handling specific interrupt to wait for the stable nic state. As the NIC + * interrupt state is not stable for nfp after link is just down, it needs + * to wait 4 seconds to get the stable status. + * + * @param handle Pointer to interrupt handle. + * @param param The address of parameter (struct rte_eth_dev *) + * + * @return void + */ static void +nfp_net_dev_interrupt_delayed_handler(void *param) +{ + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + + nfp_net_link_update(dev, 0); + rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + + nfp_net_dev_link_status_print(dev); + + /* Unmasking */ + nfp_net_irq_unmask(dev); +} + +void nfp_net_dev_interrupt_handler(void *param) { int64_t timeout; @@ -1464,32 +1254,7 @@ nfp_net_dev_interrupt_handler(void *param) } } -/* - * Interrupt handler which shall be registered for alarm callback for delayed - * handling specific interrupt to wait for the stable nic state. As the NIC - * interrupt state is not stable for nfp after link is just down, it needs - * to wait 4 seconds to get the stable status. - * - * @param handle Pointer to interrupt handle. - * @param param The address of parameter (struct rte_eth_dev *) - * - * @return void - */ -static void -nfp_net_dev_interrupt_delayed_handler(void *param) -{ - struct rte_eth_dev *dev = (struct rte_eth_dev *)param; - - nfp_net_link_update(dev, 0); - rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); - - nfp_net_dev_link_status_print(dev); - - /* Unmasking */ - nfp_net_irq_unmask(dev); -} - -static int +int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) { struct nfp_net_hw *hw; @@ -1524,892 +1289,48 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return 0; } -static int -nfp_net_rx_queue_setup(struct rte_eth_dev *dev, - uint16_t queue_idx, uint16_t nb_desc, - unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp) +int +nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) { - const struct rte_memzone *tz; - struct nfp_net_rxq *rxq; + uint32_t new_ctrl, update; struct nfp_net_hw *hw; - uint32_t rx_desc_sz; + int ret; hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); + new_ctrl = 0; - PMD_INIT_FUNC_TRACE(); - - /* Validating number of descriptors */ - rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); - if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || - nb_desc > NFP_NET_MAX_RX_DESC || - nb_desc < NFP_NET_MIN_RX_DESC) { - PMD_DRV_LOG(ERR, "Wrong nb_desc value"); - return -EINVAL; - } + /* Enable vlan strip if it is not configured yet */ + if ((mask & ETH_VLAN_STRIP_OFFLOAD) && + !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) + new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN; - /* - * Free memory prior to re-allocation if needed. This is the case after - * calling nfp_net_stop - */ - if (dev->data->rx_queues[queue_idx]) { - nfp_net_rx_queue_release(dev->data->rx_queues[queue_idx]); - dev->data->rx_queues[queue_idx] = NULL; - } + /* Disable vlan strip just if it is configured */ + if (!(mask & ETH_VLAN_STRIP_OFFLOAD) && + (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) + new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN; - /* Allocating rx queue data structure */ - rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), - RTE_CACHE_LINE_SIZE, socket_id); - if (rxq == NULL) - return -ENOMEM; + if (new_ctrl == 0) + return 0; - /* Hw queues mapping based on firmware configuration */ - rxq->qidx = queue_idx; - rxq->fl_qcidx = queue_idx * hw->stride_rx; - rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1); - rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); - rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx); + update = NFP_NET_CFG_UPDATE_GEN; - /* - * Tracking mbuf size for detecting a potential mbuf overflow due to - * RX offset - */ - rxq->mem_pool = mp; - rxq->mbuf_size = rxq->mem_pool->elt_size; - rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); - hw->flbufsz = rxq->mbuf_size; + ret = nfp_net_reconfig(hw, new_ctrl, update); + if (!ret) + hw->ctrl = new_ctrl; - rxq->rx_count = nb_desc; - rxq->port_id = dev->data->port_id; - rxq->rx_free_thresh = rx_conf->rx_free_thresh; - rxq->drop_en = rx_conf->rx_drop_en; + return ret; +} - /* - * Allocate RX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ - tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - sizeof(struct nfp_net_rx_desc) * - NFP_NET_MAX_RX_DESC, NFP_MEMZONE_ALIGN, - socket_id); - - if (tz == NULL) { - PMD_DRV_LOG(ERR, "Error allocating rx dma"); - nfp_net_rx_queue_release(rxq); - return -ENOMEM; - } - - /* Saving physical and virtual addresses for the RX ring */ - rxq->dma = (uint64_t)tz->iova; - rxq->rxds = (struct nfp_net_rx_desc *)tz->addr; - - /* mbuf pointers array for referencing mbufs linked to RX descriptors */ - rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", - sizeof(*rxq->rxbufs) * nb_desc, - RTE_CACHE_LINE_SIZE, socket_id); - if (rxq->rxbufs == NULL) { - nfp_net_rx_queue_release(rxq); - return -ENOMEM; - } - - PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, - rxq->rxbufs, rxq->rxds, (unsigned long int)rxq->dma); - - nfp_net_reset_rx_queue(rxq); - - dev->data->rx_queues[queue_idx] = rxq; - rxq->hw = hw; - - /* - * Telling the HW about the physical address of the RX ring and number - * of descriptors in log2 format - */ - nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); - nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); - - return 0; -} - -static int -nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) -{ - struct nfp_net_rx_buff *rxe = rxq->rxbufs; - uint64_t dma_addr; - unsigned i; - - PMD_RX_LOG(DEBUG, "nfp_net_rx_fill_freelist for %u descriptors", - rxq->rx_count); - - for (i = 0; i < rxq->rx_count; i++) { - struct nfp_net_rx_desc *rxd; - struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); - - if (mbuf == NULL) { - PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%u", - (unsigned)rxq->qidx); - return -ENOMEM; - } - - dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf)); - - rxd = &rxq->rxds[i]; - rxd->fld.dd = 0; - rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; - rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; - rxe[i].mbuf = mbuf; - PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64, i, dma_addr); - } - - /* Make sure all writes are flushed before telling the hardware */ - rte_wmb(); - - /* Not advertising the whole ring as the firmware gets confused if so */ - PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u", - rxq->rx_count - 1); - - nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); - - return 0; -} - -static int -nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf) -{ - const struct rte_memzone *tz; - struct nfp_net_txq *txq; - uint16_t tx_free_thresh; - struct nfp_net_hw *hw; - uint32_t tx_desc_sz; - - hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - PMD_INIT_FUNC_TRACE(); - - /* Validating number of descriptors */ - tx_desc_sz = nb_desc * sizeof(struct nfp_net_tx_desc); - if (tx_desc_sz % NFP_ALIGN_RING_DESC != 0 || - nb_desc > NFP_NET_MAX_TX_DESC || - nb_desc < NFP_NET_MIN_TX_DESC) { - PMD_DRV_LOG(ERR, "Wrong nb_desc value"); - return -EINVAL; - } - - tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? - tx_conf->tx_free_thresh : - DEFAULT_TX_FREE_THRESH); - - if (tx_free_thresh > (nb_desc)) { - PMD_DRV_LOG(ERR, - "tx_free_thresh must be less than the number of TX " - "descriptors. (tx_free_thresh=%u port=%d " - "queue=%d)", (unsigned int)tx_free_thresh, - dev->data->port_id, (int)queue_idx); - return -(EINVAL); - } - - /* - * Free memory prior to re-allocation if needed. This is the case after - * calling nfp_net_stop - */ - if (dev->data->tx_queues[queue_idx]) { - PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", - queue_idx); - nfp_net_tx_queue_release(dev->data->tx_queues[queue_idx]); - dev->data->tx_queues[queue_idx] = NULL; - } - - /* Allocating tx queue data structure */ - txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq), - RTE_CACHE_LINE_SIZE, socket_id); - if (txq == NULL) { - PMD_DRV_LOG(ERR, "Error allocating tx dma"); - return -ENOMEM; - } - - /* - * Allocate TX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ - tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct nfp_net_tx_desc) * - NFP_NET_MAX_TX_DESC, NFP_MEMZONE_ALIGN, - socket_id); - if (tz == NULL) { - PMD_DRV_LOG(ERR, "Error allocating tx dma"); - nfp_net_tx_queue_release(txq); - return -ENOMEM; - } - - txq->tx_count = nb_desc; - txq->tx_free_thresh = tx_free_thresh; - txq->tx_pthresh = tx_conf->tx_thresh.pthresh; - txq->tx_hthresh = tx_conf->tx_thresh.hthresh; - txq->tx_wthresh = tx_conf->tx_thresh.wthresh; - - /* queue mapping based on firmware configuration */ - txq->qidx = queue_idx; - txq->tx_qcidx = queue_idx * hw->stride_tx; - txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx); - - txq->port_id = dev->data->port_id; - - /* Saving physical and virtual addresses for the TX ring */ - txq->dma = (uint64_t)tz->iova; - txq->txds = (struct nfp_net_tx_desc *)tz->addr; - - /* mbuf pointers array for referencing mbufs linked to TX descriptors */ - txq->txbufs = rte_zmalloc_socket("txq->txbufs", - sizeof(*txq->txbufs) * nb_desc, - RTE_CACHE_LINE_SIZE, socket_id); - if (txq->txbufs == NULL) { - nfp_net_tx_queue_release(txq); - return -ENOMEM; - } - PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, - txq->txbufs, txq->txds, (unsigned long int)txq->dma); - - nfp_net_reset_tx_queue(txq); - - dev->data->tx_queues[queue_idx] = txq; - txq->hw = hw; - - /* - * Telling the HW about the physical address of the TX ring and number - * of descriptors in log2 format - */ - nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma); - nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(nb_desc)); - - return 0; -} - -/* nfp_net_tx_tso - Set TX descriptor for TSO */ -static inline void -nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if (!(hw->cap & NFP_NET_CFG_CTRL_LSO_ANY)) - goto clean_txd; - - ol_flags = mb->ol_flags; - - if (!(ol_flags & PKT_TX_TCP_SEG)) - goto clean_txd; - - txd->l3_offset = mb->l2_len; - txd->l4_offset = mb->l2_len + mb->l3_len; - txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; - txd->mss = rte_cpu_to_le_16(mb->tso_segsz); - txd->flags = PCIE_DESC_TX_LSO; - return; - -clean_txd: - txd->flags = 0; - txd->l3_offset = 0; - txd->l4_offset = 0; - txd->lso_hdrlen = 0; - txd->mss = 0; -} - -/* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */ -static inline void -nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if (!(hw->cap & NFP_NET_CFG_CTRL_TXCSUM)) - return; - - ol_flags = mb->ol_flags; - - /* IPv6 does not need checksum */ - if (ol_flags & PKT_TX_IP_CKSUM) - txd->flags |= PCIE_DESC_TX_IP4_CSUM; - - switch (ol_flags & PKT_TX_L4_MASK) { - case PKT_TX_UDP_CKSUM: - txd->flags |= PCIE_DESC_TX_UDP_CSUM; - break; - case PKT_TX_TCP_CKSUM: - txd->flags |= PCIE_DESC_TX_TCP_CSUM; - break; - } - - if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)) - txd->flags |= PCIE_DESC_TX_CSUM; -} - -/* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */ -static inline void -nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd, - struct rte_mbuf *mb) -{ - struct nfp_net_hw *hw = rxq->hw; - - if (!(hw->ctrl & NFP_NET_CFG_CTRL_RXCSUM)) - return; - - /* If IPv4 and IP checksum error, fail */ - if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) && - !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))) - mb->ol_flags |= PKT_RX_IP_CKSUM_BAD; - else - mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - - /* If neither UDP nor TCP return */ - if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) && - !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM)) - return; - - if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK)) - mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - else - mb->ol_flags |= PKT_RX_L4_CKSUM_BAD; -} - -#define NFP_HASH_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 4) -#define NFP_HASH_TYPE_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 8) - -#define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK) - -/* - * nfp_net_set_hash - Set mbuf hash data - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the mbuf fields. - */ -static inline void -nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd, - struct rte_mbuf *mbuf) -{ - struct nfp_net_hw *hw = rxq->hw; - uint8_t *meta_offset; - uint32_t meta_info; - uint32_t hash = 0; - uint32_t hash_type = 0; - - if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) - return; - - /* this is true for new firmwares */ - if (likely(((hw->cap & NFP_NET_CFG_CTRL_RSS2) || - (NFD_CFG_MAJOR_VERSION_of(hw->ver) == 4)) && - NFP_DESC_META_LEN(rxd))) { - /* - * new metadata api: - * <---- 32 bit -----> - * m field type word - * e data field #2 - * t data field #1 - * a data field #0 - * ==================== - * packet data - * - * Field type word contains up to 8 4bit field types - * A 4bit field type refers to a data field word - * A data field word can have several 4bit field types - */ - meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *); - meta_offset -= NFP_DESC_META_LEN(rxd); - meta_info = rte_be_to_cpu_32(*(uint32_t *)meta_offset); - meta_offset += 4; - /* NFP PMD just supports metadata for hashing */ - switch (meta_info & NFP_NET_META_FIELD_MASK) { - case NFP_NET_META_HASH: - /* next field type is about the hash type */ - meta_info >>= NFP_NET_META_FIELD_SIZE; - /* hash value is in the data field */ - hash = rte_be_to_cpu_32(*(uint32_t *)meta_offset); - hash_type = meta_info & NFP_NET_META_FIELD_MASK; - break; - default: - /* Unsupported metadata can be a performance issue */ - return; - } - } else { - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) - return; - - hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET); - hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET); - } - - mbuf->hash.rss = hash; - mbuf->ol_flags |= PKT_RX_RSS_HASH; - - switch (hash_type) { - case NFP_NET_RSS_IPV4: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV4; - break; - case NFP_NET_RSS_IPV6: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6; - break; - case NFP_NET_RSS_IPV6_EX: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV4_TCP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV6_TCP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV4_UDP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV6_UDP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - default: - mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK; - } -} - -static inline void -nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq) -{ - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; -} - -#define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK) - -/* - * RX path design: - * - * There are some decisions to take: - * 1) How to check DD RX descriptors bit - * 2) How and when to allocate new mbufs - * - * Current implementation checks just one single DD bit each loop. As each - * descriptor is 8 bytes, it is likely a good idea to check descriptors in - * a single cache line instead. Tests with this change have not shown any - * performance improvement but it requires further investigation. For example, - * depending on which descriptor is next, the number of descriptors could be - * less than 8 for just checking those in the same cache line. This implies - * extra work which could be counterproductive by itself. Indeed, last firmware - * changes are just doing this: writing several descriptors with the DD bit - * for saving PCIe bandwidth and DMA operations from the NFP. - * - * Mbuf allocation is done when a new packet is received. Then the descriptor - * is automatically linked with the new mbuf and the old one is given to the - * user. The main drawback with this design is mbuf allocation is heavier than - * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the - * cache point of view it does not seem allocating the mbuf early on as we are - * doing now have any benefit at all. Again, tests with this change have not - * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing - * so looking at the implications of this type of allocation should be studied - * deeply - */ - -static uint16_t -nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) -{ - struct nfp_net_rxq *rxq; - struct nfp_net_rx_desc *rxds; - struct nfp_net_rx_buff *rxb; - struct nfp_net_hw *hw; - struct rte_mbuf *mb; - struct rte_mbuf *new_mb; - uint16_t nb_hold; - uint64_t dma_addr; - int avail; - - rxq = rx_queue; - if (unlikely(rxq == NULL)) { - /* - * DPDK just checks the queue is lower than max queues - * enabled. But the queue needs to be configured - */ - RTE_LOG_DP(ERR, PMD, "RX Bad queue\n"); - return -EINVAL; - } - - hw = rxq->hw; - avail = 0; - nb_hold = 0; - - while (avail < nb_pkts) { - rxb = &rxq->rxbufs[rxq->rd_p]; - if (unlikely(rxb == NULL)) { - RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n"); - break; - } - - rxds = &rxq->rxds[rxq->rd_p]; - if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) - break; - - /* - * Memory barrier to ensure that we won't do other - * reads before the DD bit. - */ - rte_rmb(); - - /* - * We got a packet. Let's alloc a new mbuf for refilling the - * free descriptor ring as soon as possible - */ - new_mb = rte_pktmbuf_alloc(rxq->mem_pool); - if (unlikely(new_mb == NULL)) { - RTE_LOG_DP(DEBUG, PMD, - "RX mbuf alloc failed port_id=%u queue_id=%u\n", - rxq->port_id, (unsigned int)rxq->qidx); - nfp_net_mbuf_alloc_failed(rxq); - break; - } - - nb_hold++; - - /* - * Grab the mbuf and refill the descriptor with the - * previously allocated mbuf - */ - mb = rxb->mbuf; - rxb->mbuf = new_mb; - - PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u", - rxds->rxd.data_len, rxq->mbuf_size); - - /* Size of this segment */ - mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); - /* Size of the whole packet. We just support 1 segment */ - mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); - - if (unlikely((mb->data_len + hw->rx_offset) > - rxq->mbuf_size)) { - /* - * This should not happen and the user has the - * responsibility of avoiding it. But we have - * to give some info about the error - */ - RTE_LOG_DP(ERR, PMD, - "mbuf overflow likely due to the RX offset.\n" - "\t\tYour mbuf size should have extra space for" - " RX offset=%u bytes.\n" - "\t\tCurrently you just have %u bytes available" - " but the received packet is %u bytes long", - hw->rx_offset, - rxq->mbuf_size - hw->rx_offset, - mb->data_len); - return -EINVAL; - } - - /* Filling the received mbuf with packet info */ - if (hw->rx_offset) - mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; - else - mb->data_off = RTE_PKTMBUF_HEADROOM + - NFP_DESC_META_LEN(rxds); - - /* No scatter mode supported */ - mb->nb_segs = 1; - mb->next = NULL; - - mb->port = rxq->port_id; - - /* Checking the RSS flag */ - nfp_net_set_hash(rxq, rxds, mb); - - /* Checking the checksum flag */ - nfp_net_rx_cksum(rxq, rxds, mb); - - if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) && - (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) { - mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan); - mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - } - - /* Adding the mbuf to the mbuf array passed by the app */ - rx_pkts[avail++] = mb; - - /* Now resetting and updating the descriptor */ - rxds->vals[0] = 0; - rxds->vals[1] = 0; - dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb)); - rxds->fld.dd = 0; - rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; - rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; - - rxq->rd_p++; - if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/ - rxq->rd_p = 0; - } - - if (nb_hold == 0) - return nb_hold; - - PMD_RX_LOG(DEBUG, "RX port_id=%u queue_id=%u, %d packets received", - rxq->port_id, (unsigned int)rxq->qidx, nb_hold); - - nb_hold += rxq->nb_rx_hold; - - /* - * FL descriptors needs to be written before incrementing the - * FL queue WR pointer - */ - rte_wmb(); - if (nb_hold > rxq->rx_free_thresh) { - PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u", - rxq->port_id, (unsigned int)rxq->qidx, - (unsigned)nb_hold, (unsigned)avail); - nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); - nb_hold = 0; - } - rxq->nb_rx_hold = nb_hold; - - return avail; -} - -/* - * nfp_net_tx_free_bufs - Check for descriptors with a complete - * status - * @txq: TX queue to work with - * Returns number of descriptors freed - */ -int -nfp_net_tx_free_bufs(struct nfp_net_txq *txq) -{ - uint32_t qcp_rd_p; - int todo; - - PMD_TX_LOG(DEBUG, "queue %u. Check for descriptor with a complete" - " status", txq->qidx); - - /* Work out how many packets have been sent */ - qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); - - if (qcp_rd_p == txq->rd_p) { - PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending " - "packets (%u, %u)", txq->qidx, - qcp_rd_p, txq->rd_p); - return 0; - } - - if (qcp_rd_p > txq->rd_p) - todo = qcp_rd_p - txq->rd_p; - else - todo = qcp_rd_p + txq->tx_count - txq->rd_p; - - PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u", - qcp_rd_p, txq->rd_p, txq->rd_p); - - if (todo == 0) - return todo; - - txq->rd_p += todo; - if (unlikely(txq->rd_p >= txq->tx_count)) - txq->rd_p -= txq->tx_count; - - return todo; -} - -/* Leaving always free descriptors for avoiding wrapping confusion */ -static inline -uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq) -{ - if (txq->wr_p >= txq->rd_p) - return txq->tx_count - (txq->wr_p - txq->rd_p) - 8; - else - return txq->rd_p - txq->wr_p - 8; -} - -/* - * nfp_net_txq_full - Check if the TX queue free descriptors - * is below tx_free_threshold - * - * @txq: TX queue to check - * - * This function uses the host copy* of read/write pointers - */ -static inline -uint32_t nfp_net_txq_full(struct nfp_net_txq *txq) -{ - return (nfp_free_tx_desc(txq) < txq->tx_free_thresh); -} - -static uint16_t -nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - struct nfp_net_txq *txq; - struct nfp_net_hw *hw; - struct nfp_net_tx_desc *txds, txd; - struct rte_mbuf *pkt; - uint64_t dma_addr; - int pkt_size, dma_size; - uint16_t free_descs, issued_descs; - struct rte_mbuf **lmbuf; - int i; - - txq = tx_queue; - hw = txq->hw; - txds = &txq->txds[txq->wr_p]; - - PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets", - txq->qidx, txq->wr_p, nb_pkts); - - if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq))) - nfp_net_tx_free_bufs(txq); - - free_descs = (uint16_t)nfp_free_tx_desc(txq); - if (unlikely(free_descs == 0)) - return 0; - - pkt = *tx_pkts; - - i = 0; - issued_descs = 0; - PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets", - txq->qidx, nb_pkts); - /* Sending packets */ - while ((i < nb_pkts) && free_descs) { - /* Grabbing the mbuf linked to the current descriptor */ - lmbuf = &txq->txbufs[txq->wr_p].mbuf; - /* Warming the cache for releasing the mbuf later on */ - RTE_MBUF_PREFETCH_TO_FREE(*lmbuf); - - pkt = *(tx_pkts + i); - - if (unlikely((pkt->nb_segs > 1) && - !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) { - PMD_INIT_LOG(INFO, "NFP_NET_CFG_CTRL_GATHER not set"); - rte_panic("Multisegment packet unsupported\n"); - } - - /* Checking if we have enough descriptors */ - if (unlikely(pkt->nb_segs > free_descs)) - goto xmit_end; - - /* - * Checksum and VLAN flags just in the first descriptor for a - * multisegment packet, but TSO info needs to be in all of them. - */ - txd.data_len = pkt->pkt_len; - nfp_net_tx_tso(txq, &txd, pkt); - nfp_net_tx_cksum(txq, &txd, pkt); - - if ((pkt->ol_flags & PKT_TX_VLAN_PKT) && - (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)) { - txd.flags |= PCIE_DESC_TX_VLAN; - txd.vlan = pkt->vlan_tci; - } - - /* - * mbuf data_len is the data in one segment and pkt_len data - * in the whole packet. When the packet is just one segment, - * then data_len = pkt_len - */ - pkt_size = pkt->pkt_len; - - while (pkt) { - /* Copying TSO, VLAN and cksum info */ - *txds = txd; - - /* Releasing mbuf used by this descriptor previously*/ - if (*lmbuf) - rte_pktmbuf_free_seg(*lmbuf); - - /* - * Linking mbuf with descriptor for being released - * next time descriptor is used - */ - *lmbuf = pkt; - - dma_size = pkt->data_len; - dma_addr = rte_mbuf_data_iova(pkt); - PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:" - "%" PRIx64 "", dma_addr); - - /* Filling descriptors fields */ - txds->dma_len = dma_size; - txds->data_len = txd.data_len; - txds->dma_addr_hi = (dma_addr >> 32) & 0xff; - txds->dma_addr_lo = (dma_addr & 0xffffffff); - ASSERT(free_descs > 0); - free_descs--; - - txq->wr_p++; - if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/ - txq->wr_p = 0; - - pkt_size -= dma_size; - - /* - * Making the EOP, packets with just one segment - * the priority - */ - if (likely(!pkt_size)) - txds->offset_eop = PCIE_DESC_TX_EOP; - else - txds->offset_eop = 0; - - pkt = pkt->next; - /* Referencing next free TX descriptor */ - txds = &txq->txds[txq->wr_p]; - lmbuf = &txq->txbufs[txq->wr_p].mbuf; - issued_descs++; - } - i++; - } - -xmit_end: - /* Increment write pointers. Force memory write before we let HW know */ - rte_wmb(); - nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs); - - return i; -} - -static int -nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) -{ - uint32_t new_ctrl, update; - struct nfp_net_hw *hw; - int ret; - - hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - new_ctrl = 0; - - /* Enable vlan strip if it is not configured yet */ - if ((mask & ETH_VLAN_STRIP_OFFLOAD) && - !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) - new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN; - - /* Disable vlan strip just if it is configured */ - if (!(mask & ETH_VLAN_STRIP_OFFLOAD) && - (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) - new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN; - - if (new_ctrl == 0) - return 0; - - update = NFP_NET_CFG_UPDATE_GEN; - - ret = nfp_net_reconfig(hw, new_ctrl, update); - if (!ret) - hw->ctrl = new_ctrl; - - return ret; -} - -static int -nfp_net_rss_reta_write(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - uint32_t reta, mask; - int i, j; - int idx, shift; - struct nfp_net_hw *hw = - NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); +static int +nfp_net_rss_reta_write(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint32_t reta, mask; + int i, j; + int idx, shift; + struct nfp_net_hw *hw = + NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " @@ -2451,7 +1372,7 @@ nfp_net_rss_reta_write(struct rte_eth_dev *dev, } /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */ -static int +int nfp_net_reta_update(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) @@ -2477,7 +1398,7 @@ nfp_net_reta_update(struct rte_eth_dev *dev, } /* Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. */ -static int +int nfp_net_reta_query(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) @@ -2574,7 +1495,7 @@ nfp_net_rss_hash_write(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf) { @@ -2610,7 +1531,7 @@ nfp_net_rss_hash_update(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf) { @@ -2661,7 +1582,7 @@ nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_config_default(struct rte_eth_dev *dev) { struct rte_eth_conf *dev_conf; @@ -2732,43 +1653,13 @@ static const struct eth_dev_ops nfp_net_eth_dev_ops = { .rx_queue_intr_disable = nfp_rx_queue_intr_disable, }; -/* - * All eth_dev created got its private data, but before nfp_net_init, that - * private data is referencing private data for all the PF ports. This is due - * to how the vNIC bars are mapped based on first port, so all ports need info - * about port 0 private data. Inside nfp_net_init the private data pointer is - * changed to the right address for each port once the bars have been mapped. - * - * This functions helps to find out which port and therefore which offset - * inside the private data array to use. - */ -static int -get_pf_port_number(char *name) -{ - char *pf_str = name; - int size = 0; - - while ((*pf_str != '_') && (*pf_str != '\0') && (size++ < 30)) - pf_str++; - - if (size == 30) - /* - * This should not happen at all and it would mean major - * implementation fault. - */ - rte_panic("nfp_net: problem with pf device name\n"); - - /* Expecting _portX with X within [0,7] */ - pf_str += 5; - - return (int)strtol(pf_str, NULL, 10); -} static int nfp_net_init(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; - struct nfp_net_hw *hw, *hwport0; + struct nfp_pf_dev *pf_dev; + struct nfp_net_hw *hw; uint64_t tx_bar_off = 0, rx_bar_off = 0; uint32_t start_q; @@ -2780,6 +1671,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev) pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + /* Use backpointer here to the PF of this eth_dev */ + pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(eth_dev->data->dev_private); + /* NFP can not handle DMA addresses requiring more than 40 bits */ if (rte_mem_check_dma_mask(40)) { RTE_LOG(ERR, PMD, "device %s can not be used:", @@ -2790,22 +1684,23 @@ nfp_net_init(struct rte_eth_dev *eth_dev) if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) || (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) { - port = get_pf_port_number(eth_dev->data->name); + port = ((struct nfp_net_hw *)eth_dev->data->dev_private)->idx; if (port < 0 || port > 7) { PMD_DRV_LOG(ERR, "Port value is wrong"); return -ENODEV; } - PMD_INIT_LOG(DEBUG, "Working with PF port value %d", port); + /* Use PF array of physical ports to get pointer to + * this specific port + */ + hw = pf_dev->ports[port]; - /* This points to port 0 private data */ - hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + PMD_INIT_LOG(DEBUG, "Working with physical port number: %d, " + "NFP internal port number: %d", + port, hw->nfp_idx); - /* This points to the specific port private data */ - hw = &hwport0[port]; } else { hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - hwport0 = 0; } eth_dev->dev_ops = &nfp_net_eth_dev_ops; @@ -2836,25 +1731,18 @@ nfp_net_init(struct rte_eth_dev *eth_dev) return -ENODEV; } - if (hw->is_pf && port == 0) { - hw->ctrl_bar = nfp_rtsym_map(hw->sym_tbl, "_pf0_net_bar0", - hw->total_ports * 32768, - &hw->ctrl_area); - if (!hw->ctrl_bar) { - printf("nfp_rtsym_map fails for _pf0_net_ctrl_bar"); - return -EIO; + if (hw->is_phyport) { + if (port == 0) { + hw->ctrl_bar = pf_dev->ctrl_bar; + } else { + if (!pf_dev->ctrl_bar) + return -ENODEV; + /* Use port offset in pf ctrl_bar for this + * ports control bar + */ + hw->ctrl_bar = pf_dev->ctrl_bar + + (port * NFP_PF_CSR_SLICE_SIZE); } - - PMD_INIT_LOG(DEBUG, "ctrl bar: %p", hw->ctrl_bar); - } - - if (port > 0) { - if (!hwport0->ctrl_bar) - return -ENODEV; - - /* address based on port0 offset */ - hw->ctrl_bar = hwport0->ctrl_bar + - (port * NFP_PF_CSR_SLICE_SIZE); } PMD_INIT_LOG(DEBUG, "ctrl bar: %p", hw->ctrl_bar); @@ -2881,26 +1769,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev) PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "", tx_bar_off); PMD_INIT_LOG(DEBUG, "rx_bar_off: 0x%" PRIx64 "", rx_bar_off); - if (hw->is_pf && port == 0) { - /* configure access to tx/rx vNIC BARs */ - hwport0->hw_queues = nfp_cpp_map_area(hw->cpp, 0, 0, - NFP_PCIE_QUEUE(0), - NFP_QCP_QUEUE_AREA_SZ, - &hw->hwqueues_area); - - if (!hwport0->hw_queues) { - printf("nfp_rtsym_map fails for net.qc"); - err = -EIO; - goto dev_err_ctrl_map; - } - - PMD_INIT_LOG(DEBUG, "tx/rx bar address: 0x%p", - hwport0->hw_queues); - } - - if (hw->is_pf) { - hw->tx_bar = hwport0->hw_queues + tx_bar_off; - hw->rx_bar = hwport0->hw_queues + rx_bar_off; + if (hw->is_phyport) { + hw->tx_bar = pf_dev->hw_queues + tx_bar_off; + hw->rx_bar = pf_dev->hw_queues + rx_bar_off; eth_dev->data->dev_private = hw; } else { hw->tx_bar = (uint8_t *)pci_dev->mem_resource[2].addr + @@ -2940,549 +1811,84 @@ nfp_net_init(struct rte_eth_dev *eth_dev) hw->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", hw->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", hw->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", - hw->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", - hw->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", - hw->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", - hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "", - hw->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", - hw->cap & NFP_NET_CFG_CTRL_LSO2 ? "TSOv2 " : "", - hw->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", - hw->cap & NFP_NET_CFG_CTRL_RSS2 ? "RSSv2 " : ""); - - hw->ctrl = 0; - - hw->stride_rx = stride; - hw->stride_tx = stride; - - PMD_INIT_LOG(INFO, "max_rx_queues: %u, max_tx_queues: %u", - hw->max_rx_queues, hw->max_tx_queues); - - /* Initializing spinlock for reconfigs */ - rte_spinlock_init(&hw->reconfig_lock); - - /* Allocating memory for mac addr */ - eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", - RTE_ETHER_ADDR_LEN, 0); - if (eth_dev->data->mac_addrs == NULL) { - PMD_INIT_LOG(ERR, "Failed to space for MAC address"); - err = -ENOMEM; - goto dev_err_queues_map; - } - - if (hw->is_pf) { - nfp_net_pf_read_mac(hwport0, port); - nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); - } else { - nfp_net_vf_read_mac(hw); - } - - if (!rte_is_valid_assigned_ether_addr( - (struct rte_ether_addr *)&hw->mac_addr)) { - PMD_INIT_LOG(INFO, "Using random mac address for port %d", - port); - /* Using random mac addresses for VFs */ - rte_eth_random_addr(&hw->mac_addr[0]); - nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); - } - - /* Copying mac address to DPDK eth_dev struct */ - rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr, - ð_dev->data->mac_addrs[0]); - - if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)) - eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR; - - eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; - - PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x " - "mac=%02x:%02x:%02x:%02x:%02x:%02x", - eth_dev->data->port_id, pci_dev->id.vendor_id, - pci_dev->id.device_id, - hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], - hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); - - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* Registering LSC interrupt handler */ - rte_intr_callback_register(&pci_dev->intr_handle, - nfp_net_dev_interrupt_handler, - (void *)eth_dev); - /* Telling the firmware about the LSC interrupt entry */ - nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); - /* Recording current stats counters values */ - nfp_net_stats_reset(eth_dev); - } - - return 0; - -dev_err_queues_map: - nfp_cpp_area_free(hw->hwqueues_area); -dev_err_ctrl_map: - nfp_cpp_area_free(hw->ctrl_area); - - return err; -} - -#define NFP_CPP_MEMIO_BOUNDARY (1 << 20) - -/* - * Serving a write request to NFP from host programs. The request - * sends the write size and the CPP target. The bridge makes use - * of CPP interface handler configured by the PMD setup. - */ -static int -nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) -{ - struct nfp_cpp_area *area; - off_t offset, nfp_offset; - uint32_t cpp_id, pos, len; - uint32_t tmpbuf[16]; - size_t count, curlen, totlen = 0; - int err = 0; - - PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, - sizeof(off_t), sizeof(size_t)); - - /* Reading the count param */ - err = recv(sockfd, &count, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - curlen = count; - - /* Reading the offset param */ - err = recv(sockfd, &offset, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - /* Obtain target's CPP ID and offset in target */ - cpp_id = (offset >> 40) << 8; - nfp_offset = offset & ((1ull << 40) - 1); - - PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, - offset); - PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, - cpp_id, nfp_offset); - - /* Adjust length if not aligned */ - if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) != - (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) { - curlen = NFP_CPP_MEMIO_BOUNDARY - - (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1)); - } - - while (count > 0) { - /* configure a CPP PCIe2CPP BAR for mapping the CPP target */ - area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev", - nfp_offset, curlen); - if (!area) { - RTE_LOG(ERR, PMD, "%s: area alloc fail\n", __func__); - return -EIO; - } - - /* mapping the target */ - err = nfp_cpp_area_acquire(area); - if (err < 0) { - RTE_LOG(ERR, PMD, "area acquire failed\n"); - nfp_cpp_area_free(area); - return -EIO; - } - - for (pos = 0; pos < curlen; pos += len) { - len = curlen - pos; - if (len > sizeof(tmpbuf)) - len = sizeof(tmpbuf); - - PMD_CPP_LOG(DEBUG, "%s: Receive %u of %zu\n", __func__, - len, count); - err = recv(sockfd, tmpbuf, len, MSG_WAITALL); - if (err != (int)len) { - RTE_LOG(ERR, PMD, - "%s: error when receiving, %d of %zu\n", - __func__, err, count); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - err = nfp_cpp_area_write(area, pos, tmpbuf, len); - if (err < 0) { - RTE_LOG(ERR, PMD, "nfp_cpp_area_write error\n"); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - } - - nfp_offset += pos; - totlen += pos; - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - - count -= pos; - curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ? - NFP_CPP_MEMIO_BOUNDARY : count; - } - - return 0; -} - -/* - * Serving a read request to NFP from host programs. The request - * sends the read size and the CPP target. The bridge makes use - * of CPP interface handler configured by the PMD setup. The read - * data is sent to the requester using the same socket. - */ -static int -nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) -{ - struct nfp_cpp_area *area; - off_t offset, nfp_offset; - uint32_t cpp_id, pos, len; - uint32_t tmpbuf[16]; - size_t count, curlen, totlen = 0; - int err = 0; - - PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, - sizeof(off_t), sizeof(size_t)); - - /* Reading the count param */ - err = recv(sockfd, &count, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - curlen = count; - - /* Reading the offset param */ - err = recv(sockfd, &offset, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - /* Obtain target's CPP ID and offset in target */ - cpp_id = (offset >> 40) << 8; - nfp_offset = offset & ((1ull << 40) - 1); - - PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, - offset); - PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, - cpp_id, nfp_offset); - - /* Adjust length if not aligned */ - if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) != - (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) { - curlen = NFP_CPP_MEMIO_BOUNDARY - - (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1)); - } - - while (count > 0) { - area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev", - nfp_offset, curlen); - if (!area) { - RTE_LOG(ERR, PMD, "%s: area alloc failed\n", __func__); - return -EIO; - } - - err = nfp_cpp_area_acquire(area); - if (err < 0) { - RTE_LOG(ERR, PMD, "area acquire failed\n"); - nfp_cpp_area_free(area); - return -EIO; - } - - for (pos = 0; pos < curlen; pos += len) { - len = curlen - pos; - if (len > sizeof(tmpbuf)) - len = sizeof(tmpbuf); - - err = nfp_cpp_area_read(area, pos, tmpbuf, len); - if (err < 0) { - RTE_LOG(ERR, PMD, "nfp_cpp_area_read error\n"); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - PMD_CPP_LOG(DEBUG, "%s: sending %u of %zu\n", __func__, - len, count); - - err = send(sockfd, tmpbuf, len, 0); - if (err != (int)len) { - RTE_LOG(ERR, PMD, - "%s: error when sending: %d of %zu\n", - __func__, err, count); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - } - - nfp_offset += pos; - totlen += pos; - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - - count -= pos; - curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ? - NFP_CPP_MEMIO_BOUNDARY : count; - } - return 0; -} - -#define NFP_IOCTL 'n' -#define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t) -/* - * Serving a ioctl command from host NFP tools. This usually goes to - * a kernel driver char driver but it is not available when the PF is - * bound to the PMD. Currently just one ioctl command is served and it - * does not require any CPP access at all. - */ -static int -nfp_cpp_bridge_serve_ioctl(int sockfd, struct nfp_cpp *cpp) -{ - uint32_t cmd, ident_size, tmp; - int err; - - /* Reading now the IOCTL command */ - err = recv(sockfd, &cmd, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__); - return -EIO; - } - - /* Only supporting NFP_IOCTL_CPP_IDENTIFICATION */ - if (cmd != NFP_IOCTL_CPP_IDENTIFICATION) { - RTE_LOG(ERR, PMD, "%s: unknown cmd %d\n", __func__, cmd); - return -EINVAL; - } - - err = recv(sockfd, &ident_size, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__); - return -EIO; - } - - tmp = nfp_cpp_model(cpp); - - PMD_CPP_LOG(DEBUG, "%s: sending NFP model %08x\n", __func__, tmp); - - err = send(sockfd, &tmp, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__); - return -EIO; - } - - tmp = cpp->interface; - - PMD_CPP_LOG(DEBUG, "%s: sending NFP interface %08x\n", __func__, tmp); - - err = send(sockfd, &tmp, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__); - return -EIO; - } - - return 0; -} - -#define NFP_BRIDGE_OP_READ 20 -#define NFP_BRIDGE_OP_WRITE 30 -#define NFP_BRIDGE_OP_IOCTL 40 - -/* - * This is the code to be executed by a service core. The CPP bridge interface - * is based on a unix socket and requests usually received by a kernel char - * driver, read, write and ioctl, are handled by the CPP bridge. NFP host tools - * can be executed with a wrapper library and LD_LIBRARY being completely - * unaware of the CPP bridge performing the NFP kernel char driver for CPP - * accesses. - */ -static int32_t -nfp_cpp_bridge_service_func(void *args) -{ - struct sockaddr address; - struct nfp_cpp *cpp = args; - int sockfd, datafd, op, ret; - - unlink("/tmp/nfp_cpp"); - sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd < 0) { - RTE_LOG(ERR, PMD, "%s: socket creation error. Service failed\n", - __func__); - return -EIO; - } - - memset(&address, 0, sizeof(struct sockaddr)); - - address.sa_family = AF_UNIX; - strcpy(address.sa_data, "/tmp/nfp_cpp"); - - ret = bind(sockfd, (const struct sockaddr *)&address, - sizeof(struct sockaddr)); - if (ret < 0) { - RTE_LOG(ERR, PMD, "%s: bind error (%d). Service failed\n", - __func__, errno); - close(sockfd); - return ret; - } - - ret = listen(sockfd, 20); - if (ret < 0) { - RTE_LOG(ERR, PMD, "%s: listen error(%d). Service failed\n", - __func__, errno); - close(sockfd); - return ret; - } - - for (;;) { - datafd = accept(sockfd, NULL, NULL); - if (datafd < 0) { - RTE_LOG(ERR, PMD, "%s: accept call error (%d)\n", - __func__, errno); - RTE_LOG(ERR, PMD, "%s: service failed\n", __func__); - close(sockfd); - return -EIO; - } - - while (1) { - ret = recv(datafd, &op, 4, 0); - if (ret <= 0) { - PMD_CPP_LOG(DEBUG, "%s: socket close\n", - __func__); - break; - } - - PMD_CPP_LOG(DEBUG, "%s: getting op %u\n", __func__, op); - - if (op == NFP_BRIDGE_OP_READ) - nfp_cpp_bridge_serve_read(datafd, cpp); - - if (op == NFP_BRIDGE_OP_WRITE) - nfp_cpp_bridge_serve_write(datafd, cpp); - - if (op == NFP_BRIDGE_OP_IOCTL) - nfp_cpp_bridge_serve_ioctl(datafd, cpp); - - if (op == 0) - break; - } - close(datafd); - } - close(sockfd); - - return 0; -} - -static int -nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, - struct nfp_cpp *cpp, struct nfp_hwinfo *hwinfo, - int phys_port, struct nfp_rtsym_table *sym_tbl, void **priv) -{ - struct rte_eth_dev *eth_dev; - struct nfp_net_hw *hw = NULL; - char *port_name; - struct rte_service_spec service; - int retval; + hw->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + hw->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", + hw->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", + hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "", + hw->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", + hw->cap & NFP_NET_CFG_CTRL_LSO2 ? "TSOv2 " : "", + hw->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", + hw->cap & NFP_NET_CFG_CTRL_RSS2 ? "RSSv2 " : ""); - port_name = rte_zmalloc("nfp_pf_port_name", 100, 0); - if (!port_name) - return -ENOMEM; + hw->ctrl = 0; - if (ports > 1) - snprintf(port_name, 100, "%s_port%d", dev->device.name, port); - else - strlcat(port_name, dev->device.name, 100); + hw->stride_rx = stride; + hw->stride_tx = stride; + PMD_INIT_LOG(INFO, "max_rx_queues: %u, max_tx_queues: %u", + hw->max_rx_queues, hw->max_tx_queues); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - eth_dev = rte_eth_dev_allocate(port_name); - if (!eth_dev) { - rte_free(port_name); - return -ENODEV; - } - if (port == 0) { - *priv = rte_zmalloc(port_name, - sizeof(struct nfp_net_adapter) * - ports, RTE_CACHE_LINE_SIZE); - if (!*priv) { - rte_free(port_name); - rte_eth_dev_release_port(eth_dev); - return -ENOMEM; - } - } - eth_dev->data->dev_private = *priv; + /* Initializing spinlock for reconfigs */ + rte_spinlock_init(&hw->reconfig_lock); - /* - * dev_private pointing to port0 dev_private because we need - * to configure vNIC bars based on port0 at nfp_net_init. - * Then dev_private is adjusted per port. - */ - hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port; - hw->cpp = cpp; - hw->hwinfo = hwinfo; - hw->sym_tbl = sym_tbl; - hw->pf_port_idx = phys_port; - hw->is_pf = 1; - if (ports > 1) - hw->pf_multiport_enabled = 1; - - hw->total_ports = ports; - } else { - eth_dev = rte_eth_dev_attach_secondary(port_name); - if (!eth_dev) { - RTE_LOG(ERR, EAL, "secondary process attach failed, " - "ethdev doesn't exist"); - rte_free(port_name); - return -ENODEV; - } - eth_dev->process_private = cpp; + /* Allocating memory for mac addr */ + eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", + RTE_ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, "Failed to space for MAC address"); + err = -ENOMEM; + goto dev_err_queues_map; } - eth_dev->device = &dev->device; - rte_eth_copy_pci_info(eth_dev, dev); - - retval = nfp_net_init(eth_dev); - - if (retval) { - retval = -ENODEV; - goto probe_failed; - } else { - rte_eth_dev_probing_finish(eth_dev); + if (hw->is_phyport) { + nfp_net_pf_read_mac(pf_dev, port); + nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); } - rte_free(port_name); + if (!rte_is_valid_assigned_ether_addr( + (struct rte_ether_addr *)&hw->mac_addr)) { + PMD_INIT_LOG(INFO, "Using random mac address for port %d", + port); + /* Using random mac addresses for VFs */ + rte_eth_random_addr(&hw->mac_addr[0]); + nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); + } - if (port == 0) { - /* - * The rte_service needs to be created just once per PMD. - * And the cpp handler needs to be linked to the service. - * Secondary processes will be used for debugging DPDK apps - * when requiring to use the CPP interface for accessing NFP - * components. And the cpp handler for secondary processes is - * available at this point. - */ - memset(&service, 0, sizeof(struct rte_service_spec)); - snprintf(service.name, sizeof(service.name), "nfp_cpp_service"); - service.callback = nfp_cpp_bridge_service_func; - service.callback_userdata = (void *)cpp; + /* Copying mac address to DPDK eth_dev struct */ + rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr, + ð_dev->data->mac_addrs[0]); - hw = (struct nfp_net_hw *)(eth_dev->data->dev_private); + if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)) + eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR; - if (rte_service_component_register(&service, - &hw->nfp_cpp_service_id)) - RTE_LOG(ERR, PMD, "NFP CPP bridge service register() failed"); - else - RTE_LOG(DEBUG, PMD, "NFP CPP bridge service registered"); - } + eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; - return retval; + PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x " + "mac=%02x:%02x:%02x:%02x:%02x:%02x", + eth_dev->data->port_id, pci_dev->id.vendor_id, + pci_dev->id.device_id, + hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], + hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); -probe_failed: - rte_free(port_name); - /* free ports private data if primary process */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_free(eth_dev->data->dev_private); - eth_dev->data->dev_private = NULL; + /* Registering LSC interrupt handler */ + rte_intr_callback_register(&pci_dev->intr_handle, + nfp_net_dev_interrupt_handler, + (void *)eth_dev); + /* Telling the firmware about the LSC interrupt entry */ + nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + /* Recording current stats counters values */ + nfp_net_stats_reset(eth_dev); } - rte_eth_dev_release_port(eth_dev); - return retval; + return 0; + +dev_err_queues_map: + nfp_cpp_area_free(hw->hwqueues_area); +dev_err_ctrl_map: + nfp_cpp_area_free(hw->ctrl_area); + + return err; } #define DEFAULT_FW_PATH "/lib/firmware/netronome" @@ -3491,12 +1897,10 @@ static int nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card) { struct nfp_cpp *cpp = nsp->cpp; - int fw_f; - char *fw_buf; + void *fw_buf; char fw_name[125]; char serial[40]; - struct stat file_stat; - off_t fsize, bytes; + size_t fsize; /* Looking for firmware file in order of priority */ @@ -3509,66 +1913,34 @@ nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card) snprintf(fw_name, sizeof(fw_name), "%s/%s.nffw", DEFAULT_FW_PATH, serial); - PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name); - fw_f = open(fw_name, O_RDONLY); - if (fw_f >= 0) - goto read_fw; + if (rte_firmware_read(fw_name, &fw_buf, &fsize) == 0) + goto load_fw; /* Then try the PCI name */ snprintf(fw_name, sizeof(fw_name), "%s/pci-%s.nffw", DEFAULT_FW_PATH, dev->device.name); - PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name); - fw_f = open(fw_name, O_RDONLY); - if (fw_f >= 0) - goto read_fw; + if (rte_firmware_read(fw_name, &fw_buf, &fsize) == 0) + goto load_fw; /* Finally try the card type and media */ snprintf(fw_name, sizeof(fw_name), "%s/%s", DEFAULT_FW_PATH, card); PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name); - fw_f = open(fw_name, O_RDONLY); - if (fw_f < 0) { + if (rte_firmware_read(fw_name, &fw_buf, &fsize) < 0) { PMD_DRV_LOG(INFO, "Firmware file %s not found.", fw_name); return -ENOENT; } -read_fw: - if (fstat(fw_f, &file_stat) < 0) { - PMD_DRV_LOG(INFO, "Firmware file %s size is unknown", fw_name); - close(fw_f); - return -ENOENT; - } - - fsize = file_stat.st_size; - PMD_DRV_LOG(INFO, "Firmware file found at %s with size: %" PRIu64 "", - fw_name, (uint64_t)fsize); - - fw_buf = malloc((size_t)fsize); - if (!fw_buf) { - PMD_DRV_LOG(INFO, "malloc failed for fw buffer"); - close(fw_f); - return -ENOMEM; - } - memset(fw_buf, 0, fsize); - - bytes = read(fw_f, fw_buf, fsize); - if (bytes != fsize) { - PMD_DRV_LOG(INFO, "Reading fw to buffer failed." - "Just %" PRIu64 " of %" PRIu64 " bytes read", - (uint64_t)bytes, (uint64_t)fsize); - free(fw_buf); - close(fw_f); - return -EIO; - } +load_fw: + PMD_DRV_LOG(INFO, "Firmware file found at %s with size: %zu", + fw_name, fsize); PMD_DRV_LOG(INFO, "Uploading the firmware ..."); - nfp_nsp_load_fw(nsp, fw_buf, bytes); + nfp_nsp_load_fw(nsp, fw_buf, fsize); PMD_DRV_LOG(INFO, "Done"); free(fw_buf); - close(fw_f); - return 0; } @@ -3618,20 +1990,104 @@ nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp, return err; } -static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *dev) +static int nfp_init_phyports(struct nfp_pf_dev *pf_dev) +{ + struct nfp_net_hw *hw; + struct rte_eth_dev *eth_dev; + struct nfp_eth_table *nfp_eth_table = NULL; + int ret = 0; + int i; + + nfp_eth_table = nfp_eth_read_ports(pf_dev->cpp); + if (!nfp_eth_table) { + PMD_INIT_LOG(ERR, "Error reading NFP ethernet table"); + ret = -EIO; + goto error; + } + + /* Loop through all physical ports on PF */ + for (i = 0; i < pf_dev->total_phyports; i++) { + const unsigned int numa_node = rte_socket_id(); + char port_name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(port_name, sizeof(port_name), "%s_port%d", + pf_dev->pci_dev->device.name, i); + + /* Allocate a eth_dev for this phyport */ + eth_dev = rte_eth_dev_allocate(port_name); + if (!eth_dev) { + ret = -ENODEV; + goto port_cleanup; + } + + /* Allocate memory for this phyport */ + eth_dev->data->dev_private = + rte_zmalloc_socket(port_name, sizeof(struct nfp_net_hw), + RTE_CACHE_LINE_SIZE, numa_node); + if (!eth_dev->data->dev_private) { + ret = -ENOMEM; + rte_eth_dev_release_port(eth_dev); + goto port_cleanup; + } + + hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + + /* Add this device to the PF's array of physical ports */ + pf_dev->ports[i] = hw; + + hw->pf_dev = pf_dev; + hw->cpp = pf_dev->cpp; + hw->eth_dev = eth_dev; + hw->idx = i; + hw->nfp_idx = nfp_eth_table->ports[i].index; + hw->is_phyport = true; + + eth_dev->device = &pf_dev->pci_dev->device; + + /* ctrl/tx/rx BAR mappings and remaining init happens in + * nfp_net_init + */ + ret = nfp_net_init(eth_dev); + + if (ret) { + ret = -ENODEV; + goto port_cleanup; + } + + rte_eth_dev_probing_finish(eth_dev); + + } /* End loop, all ports on this PF */ + ret = 0; + goto eth_table_cleanup; + +port_cleanup: + for (i = 0; i < pf_dev->total_phyports; i++) { + if (pf_dev->ports[i] && pf_dev->ports[i]->eth_dev) { + struct rte_eth_dev *tmp_dev; + tmp_dev = pf_dev->ports[i]->eth_dev; + rte_eth_dev_release_port(tmp_dev); + pf_dev->ports[i] = NULL; + } + } +eth_table_cleanup: + free(nfp_eth_table); +error: + return ret; +} + +static int nfp_pf_init(struct rte_pci_device *pci_dev) { + struct nfp_pf_dev *pf_dev = NULL; struct nfp_cpp *cpp; struct nfp_hwinfo *hwinfo; struct nfp_rtsym_table *sym_tbl; struct nfp_eth_table *nfp_eth_table = NULL; + char name[RTE_ETH_NAME_MAX_LEN]; int total_ports; - void *priv = 0; int ret = -ENODEV; int err; - int i; - if (!dev) + if (!pci_dev) return ret; /* @@ -3641,73 +2097,219 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, * interface. Here we avoid this telling to the CPP init code to * use a lock file if UIO is being used. */ - if (dev->kdrv == RTE_PCI_KDRV_VFIO) - cpp = nfp_cpp_from_device_name(dev, 0); + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) + cpp = nfp_cpp_from_device_name(pci_dev, 0); else - cpp = nfp_cpp_from_device_name(dev, 1); + cpp = nfp_cpp_from_device_name(pci_dev, 1); if (!cpp) { - PMD_DRV_LOG(ERR, "A CPP handle can not be obtained"); + PMD_INIT_LOG(ERR, "A CPP handle can not be obtained"); ret = -EIO; goto error; } hwinfo = nfp_hwinfo_read(cpp); if (!hwinfo) { - PMD_DRV_LOG(ERR, "Error reading hwinfo table"); - return -EIO; + PMD_INIT_LOG(ERR, "Error reading hwinfo table"); + ret = -EIO; + goto error; } nfp_eth_table = nfp_eth_read_ports(cpp); if (!nfp_eth_table) { - PMD_DRV_LOG(ERR, "Error reading NFP ethernet table"); - return -EIO; + PMD_INIT_LOG(ERR, "Error reading NFP ethernet table"); + ret = -EIO; + goto hwinfo_cleanup; } - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) { - PMD_DRV_LOG(INFO, "Error when uploading firmware"); - ret = -EIO; - goto error; - } + if (nfp_fw_setup(pci_dev, cpp, nfp_eth_table, hwinfo)) { + PMD_INIT_LOG(ERR, "Error when uploading firmware"); + ret = -EIO; + goto eth_table_cleanup; } /* Now the symbol table should be there */ sym_tbl = nfp_rtsym_table_read(cpp); if (!sym_tbl) { - PMD_DRV_LOG(ERR, "Something is wrong with the firmware" + PMD_INIT_LOG(ERR, "Something is wrong with the firmware" " symbol table"); ret = -EIO; - goto error; + goto eth_table_cleanup; } total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err); if (total_ports != (int)nfp_eth_table->count) { PMD_DRV_LOG(ERR, "Inconsistent number of ports"); ret = -EIO; - goto error; + goto sym_tbl_cleanup; } - PMD_INIT_LOG(INFO, "Total pf ports: %d", total_ports); + + PMD_INIT_LOG(INFO, "Total physical ports: %d", total_ports); if (total_ports <= 0 || total_ports > 8) { - PMD_DRV_LOG(ERR, "nfd_cfg_pf0_num_ports symbol with wrong value"); + PMD_INIT_LOG(ERR, "nfd_cfg_pf0_num_ports symbol with wrong value"); ret = -ENODEV; - goto error; + goto sym_tbl_cleanup; + } + /* Allocate memory for the PF "device" */ + snprintf(name, sizeof(name), "nfp_pf%d", 0); + pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0); + if (!pf_dev) { + ret = -ENOMEM; + goto sym_tbl_cleanup; } - for (i = 0; i < total_ports; i++) { - ret = nfp_pf_create_dev(dev, i, total_ports, cpp, hwinfo, - nfp_eth_table->ports[i].index, - sym_tbl, &priv); - if (ret) - break; + /* Populate the newly created PF device */ + pf_dev->cpp = cpp; + pf_dev->hwinfo = hwinfo; + pf_dev->sym_tbl = sym_tbl; + pf_dev->total_phyports = total_ports; + + if (total_ports > 1) + pf_dev->multiport = true; + + pf_dev->pci_dev = pci_dev; + + /* Map the symbol table */ + pf_dev->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_bar0", + pf_dev->total_phyports * 32768, + &pf_dev->ctrl_area); + if (!pf_dev->ctrl_bar) { + PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for _pf0_net_ctrl_bar"); + ret = -EIO; + goto pf_cleanup; } -error: + PMD_INIT_LOG(DEBUG, "ctrl bar: %p", pf_dev->ctrl_bar); + + /* configure access to tx/rx vNIC BARs */ + pf_dev->hw_queues = nfp_cpp_map_area(pf_dev->cpp, 0, 0, + NFP_PCIE_QUEUE(0), + NFP_QCP_QUEUE_AREA_SZ, + &pf_dev->hwqueues_area); + if (!pf_dev->hw_queues) { + PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for net.qc"); + ret = -EIO; + goto ctrl_area_cleanup; + } + + PMD_INIT_LOG(DEBUG, "tx/rx bar address: 0x%p", pf_dev->hw_queues); + + /* Initialize and prep physical ports now + * This will loop through all physical ports + */ + ret = nfp_init_phyports(pf_dev); + if (ret) { + PMD_INIT_LOG(ERR, "Could not create physical ports"); + goto hwqueues_cleanup; + } + + /* register the CPP bridge service here for primary use */ + nfp_register_cpp_service(pf_dev->cpp); + + return 0; + +hwqueues_cleanup: + nfp_cpp_area_free(pf_dev->hwqueues_area); +ctrl_area_cleanup: + nfp_cpp_area_free(pf_dev->ctrl_area); +pf_cleanup: + rte_free(pf_dev); +sym_tbl_cleanup: + free(sym_tbl); +eth_table_cleanup: free(nfp_eth_table); +hwinfo_cleanup: + free(hwinfo); +error: return ret; } +/* + * When attaching to the NFP4000/6000 PF on a secondary process there + * is no need to initialize the PF again. Only minimal work is required + * here + */ +static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev) +{ + struct nfp_cpp *cpp; + struct nfp_rtsym_table *sym_tbl; + int total_ports; + int i; + int err; + + if (!pci_dev) + return -ENODEV; + + /* + * When device bound to UIO, the device could be used, by mistake, + * by two DPDK apps, and the UIO driver does not avoid it. This + * could lead to a serious problem when configuring the NFP CPP + * interface. Here we avoid this telling to the CPP init code to + * use a lock file if UIO is being used. + */ + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) + cpp = nfp_cpp_from_device_name(pci_dev, 0); + else + cpp = nfp_cpp_from_device_name(pci_dev, 1); + + if (!cpp) { + PMD_INIT_LOG(ERR, "A CPP handle can not be obtained"); + return -EIO; + } + + /* + * We don't have access to the PF created in the primary process + * here so we have to read the number of ports from firmware + */ + sym_tbl = nfp_rtsym_table_read(cpp); + if (!sym_tbl) { + PMD_INIT_LOG(ERR, "Something is wrong with the firmware" + " symbol table"); + return -EIO; + } + + total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err); + + for (i = 0; i < total_ports; i++) { + struct rte_eth_dev *eth_dev; + char port_name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(port_name, sizeof(port_name), "%s_port%d", + pci_dev->device.name, i); + + PMD_DRV_LOG(DEBUG, "Secondary attaching to port %s", + port_name); + eth_dev = rte_eth_dev_attach_secondary(port_name); + if (!eth_dev) { + RTE_LOG(ERR, EAL, + "secondary process attach failed, " + "ethdev doesn't exist"); + return -ENODEV; + } + eth_dev->process_private = cpp; + eth_dev->dev_ops = &nfp_net_eth_dev_ops; + eth_dev->rx_queue_count = nfp_net_rx_queue_count; + eth_dev->rx_pkt_burst = &nfp_net_recv_pkts; + eth_dev->tx_pkt_burst = &nfp_net_xmit_pkts; + rte_eth_dev_probing_finish(eth_dev); + } + + /* Register the CPP bridge service for the secondary too */ + nfp_register_cpp_service(cpp); + + return 0; +} + +static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return nfp_pf_init(dev); + else + return nfp_pf_secondary_init(dev); +} + static const struct rte_pci_id pci_id_nfp_pf_net_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME, @@ -3722,55 +2324,32 @@ static const struct rte_pci_id pci_id_nfp_pf_net_map[] = { }, }; -static const struct rte_pci_id pci_id_nfp_vf_net_map[] = { - { - RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME, - PCI_DEVICE_ID_NFP6000_VF_NIC) - }, - { - .vendor_id = 0, - }, -}; - -static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev) +static int nfp_pci_uninit(struct rte_eth_dev *eth_dev) { - return rte_eth_dev_pci_generic_probe(pci_dev, - sizeof(struct nfp_net_adapter), nfp_net_init); -} + struct rte_pci_device *pci_dev; + uint16_t port_id; -static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) -{ - struct rte_eth_dev *eth_dev; - struct nfp_net_hw *hw, *hwport0; - int port = 0; + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); - eth_dev = rte_eth_dev_allocated(pci_dev->device.name); - if (eth_dev == NULL) - return 0; /* port already released */ - if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) || - (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) { - port = get_pf_port_number(eth_dev->data->name); + if (pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC || + pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC) { + /* Free up all physical ports under PF */ + RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) + rte_eth_dev_close(port_id); /* - * hotplug is not possible with multiport PF although freeing - * data structures can be done for first port. + * Ports can be closed and freed but hotplugging is not + * currently supported */ - if (port != 0) - return -ENOTSUP; - hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - hw = &hwport0[port]; - nfp_cpp_area_free(hw->ctrl_area); - nfp_cpp_area_free(hw->hwqueues_area); - free(hw->hwinfo); - free(hw->sym_tbl); - nfp_cpp_free(hw->cpp); - } else { - hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - } - /* hotplug is not possible with multiport PF */ - if (hw->pf_multiport_enabled) return -ENOTSUP; - return rte_eth_dev_pci_generic_remove(pci_dev, NULL); + } + + /* VF cleanup, just free private port data */ + return nfp_net_close(eth_dev); +} + +static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, nfp_pci_uninit); } static struct rte_pci_driver rte_nfp_net_pf_pmd = { @@ -3780,21 +2359,11 @@ static struct rte_pci_driver rte_nfp_net_pf_pmd = { .remove = eth_nfp_pci_remove, }; -static struct rte_pci_driver rte_nfp_net_vf_pmd = { - .id_table = pci_id_nfp_vf_net_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, - .probe = eth_nfp_pci_probe, - .remove = eth_nfp_pci_remove, -}; - RTE_PMD_REGISTER_PCI(net_nfp_pf, rte_nfp_net_pf_pmd); -RTE_PMD_REGISTER_PCI(net_nfp_vf, rte_nfp_net_vf_pmd); RTE_PMD_REGISTER_PCI_TABLE(net_nfp_pf, pci_id_nfp_pf_net_map); -RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map); RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio"); -RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio"); -RTE_LOG_REGISTER(nfp_logtype_init, pmd.net.nfp.init, NOTICE); -RTE_LOG_REGISTER(nfp_logtype_driver, pmd.net.nfp.driver, NOTICE); +RTE_LOG_REGISTER_SUFFIX(nfp_logtype_init, init, NOTICE); +RTE_LOG_REGISTER_SUFFIX(nfp_logtype_driver, driver, NOTICE); /* * Local variables: * c-file-style: "Linux"