X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fnfp%2Fnfp_net.c;h=a46c4dd1cc0c8eb02e7147a89c6a2b54ce186a0a;hb=7f8e73201dae6e605df6a9cdc24d9004b2590424;hp=6950d6f714361f51aa7ea849b3b229c5d5607f80;hpb=40edb9c0d36b781d711fabb91e250dca8fa6bd99;p=dpdk.git diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 6950d6f714..a46c4dd1cc 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -40,8 +40,10 @@ #include "nfpcore/nfp_nsp.h" #include "nfp_net_pmd.h" +#include "nfp_rxtx.h" #include "nfp_net_logs.h" #include "nfp_net_ctrl.h" +#include "nfp_cpp_bridge.h" #include #include @@ -53,236 +55,18 @@ /* Prototypes */ static int nfp_net_close(struct rte_eth_dev *dev); -static int nfp_net_configure(struct rte_eth_dev *dev); -static void nfp_net_dev_interrupt_handler(void *param); -static void nfp_net_dev_interrupt_delayed_handler(void *param); -static int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); -static int nfp_net_infos_get(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); static int nfp_net_init(struct rte_eth_dev *eth_dev); -static int nfp_pf_init(struct rte_eth_dev *eth_dev); +static int nfp_pf_init(struct rte_pci_device *pci_dev); +static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev); +static int nfp_net_pf_read_mac(struct nfp_pf_dev *pf_dev, int port); static int nfp_pci_uninit(struct rte_eth_dev *eth_dev); static int nfp_init_phyports(struct nfp_pf_dev *pf_dev); -static int nfp_net_link_update(struct rte_eth_dev *dev, int wait_to_complete); -static int nfp_net_promisc_enable(struct rte_eth_dev *dev); -static int nfp_net_promisc_disable(struct rte_eth_dev *dev); -static int nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq); -static uint32_t nfp_net_rx_queue_count(struct rte_eth_dev *dev, - uint16_t queue_idx); -static uint16_t nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); -static void nfp_net_rx_queue_release(void *rxq); -static int nfp_net_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp); -static int nfp_net_tx_free_bufs(struct nfp_net_txq *txq); -static void nfp_net_tx_queue_release(void *txq); -static int nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); -static int nfp_net_start(struct rte_eth_dev *dev); -static int nfp_net_stats_get(struct rte_eth_dev *dev, - struct rte_eth_stats *stats); -static int nfp_net_stats_reset(struct rte_eth_dev *dev); static int nfp_net_stop(struct rte_eth_dev *dev); -static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); - -static int nfp_net_rss_config_default(struct rte_eth_dev *dev); -static int nfp_net_rss_hash_update(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -static int nfp_net_rss_reta_write(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size); -static int nfp_net_rss_hash_write(struct rte_eth_dev *dev, - struct rte_eth_rss_conf *rss_conf); -static int nfp_set_mac_addr(struct rte_eth_dev *dev, - struct rte_ether_addr *mac_addr); -static int32_t nfp_cpp_bridge_service_func(void *args); static int nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp, struct nfp_eth_table *nfp_eth_table, struct nfp_hwinfo *hwinfo); - -/* The offset of the queue controller queues in the PCIe Target */ -#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) - -/* Maximum value which can be added to a queue with one transaction */ -#define NFP_QCP_MAX_ADD 0x7f - -#define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \ - (uint64_t)((mb)->buf_iova + RTE_PKTMBUF_HEADROOM) - -/* nfp_qcp_ptr - Read or Write Pointer of a queue */ -enum nfp_qcp_ptr { - NFP_QCP_READ_PTR = 0, - NFP_QCP_WRITE_PTR -}; - -/* - * nfp_qcp_ptr_add - Add the value to the selected pointer of a queue - * @q: Base address for queue structure - * @ptr: Add to the Read or Write pointer - * @val: Value to add to the queue pointer - * - * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. - */ -static inline void -nfp_qcp_ptr_add(uint8_t *q, enum nfp_qcp_ptr ptr, uint32_t val) -{ - uint32_t off; - - if (ptr == NFP_QCP_READ_PTR) - off = NFP_QCP_QUEUE_ADD_RPTR; - else - off = NFP_QCP_QUEUE_ADD_WPTR; - - while (val > NFP_QCP_MAX_ADD) { - nn_writel(rte_cpu_to_le_32(NFP_QCP_MAX_ADD), q + off); - val -= NFP_QCP_MAX_ADD; - } - - nn_writel(rte_cpu_to_le_32(val), q + off); -} - -/* - * nfp_qcp_read - Read the current Read/Write pointer value for a queue - * @q: Base address for queue structure - * @ptr: Read or Write pointer - */ -static inline uint32_t -nfp_qcp_read(uint8_t *q, enum nfp_qcp_ptr ptr) -{ - uint32_t off; - uint32_t val; - - if (ptr == NFP_QCP_READ_PTR) - off = NFP_QCP_QUEUE_STS_LO; - else - off = NFP_QCP_QUEUE_STS_HI; - - val = rte_cpu_to_le_32(nn_readl(q + off)); - - if (ptr == NFP_QCP_READ_PTR) - return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; - else - return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; -} - -/* - * Functions to read/write from/to Config BAR - * Performs any endian conversion necessary. - */ -static inline uint8_t -nn_cfg_readb(struct nfp_net_hw *hw, int off) -{ - return nn_readb(hw->ctrl_bar + off); -} - -static inline void -nn_cfg_writeb(struct nfp_net_hw *hw, int off, uint8_t val) -{ - nn_writeb(val, hw->ctrl_bar + off); -} - -static inline uint32_t -nn_cfg_readl(struct nfp_net_hw *hw, int off) -{ - return rte_le_to_cpu_32(nn_readl(hw->ctrl_bar + off)); -} - -static inline void -nn_cfg_writel(struct nfp_net_hw *hw, int off, uint32_t val) -{ - nn_writel(rte_cpu_to_le_32(val), hw->ctrl_bar + off); -} - -static inline uint64_t -nn_cfg_readq(struct nfp_net_hw *hw, int off) -{ - return rte_le_to_cpu_64(nn_readq(hw->ctrl_bar + off)); -} - -static inline void -nn_cfg_writeq(struct nfp_net_hw *hw, int off, uint64_t val) -{ - nn_writeq(rte_cpu_to_le_64(val), hw->ctrl_bar + off); -} - -static void -nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) -{ - unsigned i; - - if (rxq->rxbufs == NULL) - return; - - for (i = 0; i < rxq->rx_count; i++) { - if (rxq->rxbufs[i].mbuf) { - rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); - rxq->rxbufs[i].mbuf = NULL; - } - } -} - -static void -nfp_net_rx_queue_release(void *rx_queue) -{ - struct nfp_net_rxq *rxq = rx_queue; - - if (rxq) { - nfp_net_rx_queue_release_mbufs(rxq); - rte_free(rxq->rxbufs); - rte_free(rxq); - } -} - -static void -nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) -{ - nfp_net_rx_queue_release_mbufs(rxq); - rxq->rd_p = 0; - rxq->nb_rx_hold = 0; -} - -static void -nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) -{ - unsigned i; - - if (txq->txbufs == NULL) - return; - - for (i = 0; i < txq->tx_count; i++) { - if (txq->txbufs[i].mbuf) { - rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); - txq->txbufs[i].mbuf = NULL; - } - } -} - -static void -nfp_net_tx_queue_release(void *tx_queue) -{ - struct nfp_net_txq *txq = tx_queue; - - if (txq) { - nfp_net_tx_queue_release_mbufs(txq); - rte_free(txq->txbufs); - rte_free(txq); - } -} - -static void -nfp_net_reset_tx_queue(struct nfp_net_txq *txq) -{ - nfp_net_tx_queue_release_mbufs(txq); - txq->wr_p = 0; - txq->rd_p = 0; -} - static int __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update) { @@ -332,7 +116,7 @@ __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update) * Write the update word to the BAR and ping the reconfig queue. Then poll * until the firmware has acknowledged the update by zeroing the update word. */ -static int +int nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update) { uint32_t err; @@ -368,7 +152,7 @@ nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update) * before any other function in the Ethernet API. This function can * also be re-invoked when a device is in the stopped state. */ -static int +int nfp_net_configure(struct rte_eth_dev *dev) { struct rte_eth_conf *dev_conf; @@ -411,7 +195,7 @@ nfp_net_configure(struct rte_eth_dev *dev) return 0; } -static void +void nfp_net_enable_queues(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -435,7 +219,7 @@ nfp_net_enable_queues(struct rte_eth_dev *dev) nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, enabled_queues); } -static void +void nfp_net_disable_queues(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -460,26 +244,14 @@ nfp_net_disable_queues(struct rte_eth_dev *dev) hw->ctrl = new_ctrl; } -static int -nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) -{ - int i; - - for (i = 0; i < dev->data->nb_rx_queues; i++) { - if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0) - return -1; - } - return 0; -} - -static void +void nfp_net_params_setup(struct nfp_net_hw *hw) { nn_cfg_writel(hw, NFP_NET_CFG_MTU, hw->mtu); nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, hw->flbufsz); } -static void +void nfp_net_cfg_queue_setup(struct nfp_net_hw *hw) { hw->qcp_cfg = hw->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; @@ -487,7 +259,7 @@ nfp_net_cfg_queue_setup(struct nfp_net_hw *hw) #define ETH_ADDR_LEN 6 -static void +void nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src) { int i; @@ -514,19 +286,7 @@ nfp_net_pf_read_mac(struct nfp_pf_dev *pf_dev, int port) return 0; } -static void -nfp_net_vf_read_mac(struct nfp_net_hw *hw) -{ - uint32_t tmp; - - tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR)); - memcpy(&hw->mac_addr[0], &tmp, 4); - - tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR + 4)); - memcpy(&hw->mac_addr[4], &tmp, 2); -} - -static void +void nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac) { uint32_t mac0 = *(uint32_t *)mac; @@ -574,7 +334,7 @@ nfp_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) return 0; } -static int +int nfp_configure_rx_interrupt(struct rte_eth_dev *dev, struct rte_intr_handle *intr_handle) { @@ -618,7 +378,7 @@ nfp_configure_rx_interrupt(struct rte_eth_dev *dev, return 0; } -static uint32_t +uint32_t nfp_check_offloads(struct rte_eth_dev *dev) { struct nfp_net_hw *hw; @@ -954,7 +714,7 @@ nfp_net_close(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_promisc_enable(struct rte_eth_dev *dev) { uint32_t new_ctrl, update = 0; @@ -991,7 +751,7 @@ nfp_net_promisc_enable(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_promisc_disable(struct rte_eth_dev *dev) { uint32_t new_ctrl, update = 0; @@ -1027,7 +787,7 @@ nfp_net_promisc_disable(struct rte_eth_dev *dev) * Wait to complete is needed as it can take up to 9 seconds to get the Link * status. */ -static int +int nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) { struct nfp_net_hw *hw; @@ -1077,7 +837,7 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) return ret; } -static int +int nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { int i; @@ -1172,7 +932,7 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) return -EINVAL; } -static int +int nfp_net_stats_reset(struct rte_eth_dev *dev) { int i; @@ -1237,7 +997,7 @@ nfp_net_stats_reset(struct rte_eth_dev *dev) return 0; } -static int +int nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct nfp_net_hw *hw; @@ -1331,7 +1091,7 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) return 0; } -static const uint32_t * +const uint32_t * nfp_net_supported_ptypes_get(struct rte_eth_dev *dev) { static const uint32_t ptypes[] = { @@ -1348,45 +1108,7 @@ nfp_net_supported_ptypes_get(struct rte_eth_dev *dev) return NULL; } -static uint32_t -nfp_net_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx) -{ - struct nfp_net_rxq *rxq; - struct nfp_net_rx_desc *rxds; - uint32_t idx; - uint32_t count; - - rxq = (struct nfp_net_rxq *)dev->data->rx_queues[queue_idx]; - - idx = rxq->rd_p; - - count = 0; - - /* - * Other PMDs are just checking the DD bit in intervals of 4 - * descriptors and counting all four if the first has the DD - * bit on. Of course, this is not accurate but can be good for - * performance. But ideally that should be done in descriptors - * chunks belonging to the same cache line - */ - - while (count < rxq->rx_count) { - rxds = &rxq->rxds[idx]; - if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) - break; - - count++; - idx++; - - /* Wrapping? */ - if ((idx) == rxq->rx_count) - idx = 0; - } - - return count; -} - -static int +int nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct rte_pci_device *pci_dev; @@ -1406,7 +1128,7 @@ nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) return 0; } -static int +int nfp_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { struct rte_pci_device *pci_dev; @@ -1475,7 +1197,32 @@ nfp_net_irq_unmask(struct rte_eth_dev *dev) } } +/* + * Interrupt handler which shall be registered for alarm callback for delayed + * handling specific interrupt to wait for the stable nic state. As the NIC + * interrupt state is not stable for nfp after link is just down, it needs + * to wait 4 seconds to get the stable status. + * + * @param handle Pointer to interrupt handle. + * @param param The address of parameter (struct rte_eth_dev *) + * + * @return void + */ static void +nfp_net_dev_interrupt_delayed_handler(void *param) +{ + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + + nfp_net_link_update(dev, 0); + rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + + nfp_net_dev_link_status_print(dev); + + /* Unmasking */ + nfp_net_irq_unmask(dev); +} + +void nfp_net_dev_interrupt_handler(void *param) { int64_t timeout; @@ -1507,32 +1254,7 @@ nfp_net_dev_interrupt_handler(void *param) } } -/* - * Interrupt handler which shall be registered for alarm callback for delayed - * handling specific interrupt to wait for the stable nic state. As the NIC - * interrupt state is not stable for nfp after link is just down, it needs - * to wait 4 seconds to get the stable status. - * - * @param handle Pointer to interrupt handle. - * @param param The address of parameter (struct rte_eth_dev *) - * - * @return void - */ -static void -nfp_net_dev_interrupt_delayed_handler(void *param) -{ - struct rte_eth_dev *dev = (struct rte_eth_dev *)param; - - nfp_net_link_update(dev, 0); - rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); - - nfp_net_dev_link_status_print(dev); - - /* Unmasking */ - nfp_net_irq_unmask(dev); -} - -static int +int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) { struct nfp_net_hw *hw; @@ -1567,903 +1289,59 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return 0; } -static int -nfp_net_rx_queue_setup(struct rte_eth_dev *dev, - uint16_t queue_idx, uint16_t nb_desc, - unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp) +int +nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) { - const struct rte_memzone *tz; - struct nfp_net_rxq *rxq; + uint32_t new_ctrl, update; struct nfp_net_hw *hw; - uint32_t rx_desc_sz; + int ret; hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); + new_ctrl = 0; - PMD_INIT_FUNC_TRACE(); - - /* Validating number of descriptors */ - rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); - if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || - nb_desc > NFP_NET_MAX_RX_DESC || - nb_desc < NFP_NET_MIN_RX_DESC) { - PMD_DRV_LOG(ERR, "Wrong nb_desc value"); - return -EINVAL; - } - - /* - * Free memory prior to re-allocation if needed. This is the case after - * calling nfp_net_stop - */ - if (dev->data->rx_queues[queue_idx]) { - nfp_net_rx_queue_release(dev->data->rx_queues[queue_idx]); - dev->data->rx_queues[queue_idx] = NULL; - } - - /* Allocating rx queue data structure */ - rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), - RTE_CACHE_LINE_SIZE, socket_id); - if (rxq == NULL) - return -ENOMEM; + /* Enable vlan strip if it is not configured yet */ + if ((mask & ETH_VLAN_STRIP_OFFLOAD) && + !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) + new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN; - /* Hw queues mapping based on firmware configuration */ - rxq->qidx = queue_idx; - rxq->fl_qcidx = queue_idx * hw->stride_rx; - rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1); - rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); - rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx); + /* Disable vlan strip just if it is configured */ + if (!(mask & ETH_VLAN_STRIP_OFFLOAD) && + (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) + new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN; - /* - * Tracking mbuf size for detecting a potential mbuf overflow due to - * RX offset - */ - rxq->mem_pool = mp; - rxq->mbuf_size = rxq->mem_pool->elt_size; - rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); - hw->flbufsz = rxq->mbuf_size; + if (new_ctrl == 0) + return 0; - rxq->rx_count = nb_desc; - rxq->port_id = dev->data->port_id; - rxq->rx_free_thresh = rx_conf->rx_free_thresh; - rxq->drop_en = rx_conf->rx_drop_en; + update = NFP_NET_CFG_UPDATE_GEN; - /* - * Allocate RX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ - tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, - sizeof(struct nfp_net_rx_desc) * - NFP_NET_MAX_RX_DESC, NFP_MEMZONE_ALIGN, - socket_id); + ret = nfp_net_reconfig(hw, new_ctrl, update); + if (!ret) + hw->ctrl = new_ctrl; - if (tz == NULL) { - PMD_DRV_LOG(ERR, "Error allocating rx dma"); - nfp_net_rx_queue_release(rxq); - return -ENOMEM; - } + return ret; +} - /* Saving physical and virtual addresses for the RX ring */ - rxq->dma = (uint64_t)tz->iova; - rxq->rxds = (struct nfp_net_rx_desc *)tz->addr; +static int +nfp_net_rss_reta_write(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint32_t reta, mask; + int i, j; + int idx, shift; + struct nfp_net_hw *hw = + NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - /* mbuf pointers array for referencing mbufs linked to RX descriptors */ - rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", - sizeof(*rxq->rxbufs) * nb_desc, - RTE_CACHE_LINE_SIZE, socket_id); - if (rxq->rxbufs == NULL) { - nfp_net_rx_queue_release(rxq); - return -ENOMEM; + if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) { + PMD_DRV_LOG(ERR, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " + "(%d)", reta_size, NFP_NET_CFG_RSS_ITBL_SZ); + return -EINVAL; } - PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, - rxq->rxbufs, rxq->rxds, (unsigned long int)rxq->dma); - - nfp_net_reset_rx_queue(rxq); - - dev->data->rx_queues[queue_idx] = rxq; - rxq->hw = hw; - /* - * Telling the HW about the physical address of the RX ring and number - * of descriptors in log2 format - */ - nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); - nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); - - return 0; -} - -static int -nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) -{ - struct nfp_net_rx_buff *rxe = rxq->rxbufs; - uint64_t dma_addr; - unsigned i; - - PMD_RX_LOG(DEBUG, "nfp_net_rx_fill_freelist for %u descriptors", - rxq->rx_count); - - for (i = 0; i < rxq->rx_count; i++) { - struct nfp_net_rx_desc *rxd; - struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); - - if (mbuf == NULL) { - PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%u", - (unsigned)rxq->qidx); - return -ENOMEM; - } - - dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf)); - - rxd = &rxq->rxds[i]; - rxd->fld.dd = 0; - rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; - rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; - rxe[i].mbuf = mbuf; - PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64, i, dma_addr); - } - - /* Make sure all writes are flushed before telling the hardware */ - rte_wmb(); - - /* Not advertising the whole ring as the firmware gets confused if so */ - PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u", - rxq->rx_count - 1); - - nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); - - return 0; -} - -static int -nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf) -{ - const struct rte_memzone *tz; - struct nfp_net_txq *txq; - uint16_t tx_free_thresh; - struct nfp_net_hw *hw; - uint32_t tx_desc_sz; - - hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - PMD_INIT_FUNC_TRACE(); - - /* Validating number of descriptors */ - tx_desc_sz = nb_desc * sizeof(struct nfp_net_tx_desc); - if (tx_desc_sz % NFP_ALIGN_RING_DESC != 0 || - nb_desc > NFP_NET_MAX_TX_DESC || - nb_desc < NFP_NET_MIN_TX_DESC) { - PMD_DRV_LOG(ERR, "Wrong nb_desc value"); - return -EINVAL; - } - - tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? - tx_conf->tx_free_thresh : - DEFAULT_TX_FREE_THRESH); - - if (tx_free_thresh > (nb_desc)) { - PMD_DRV_LOG(ERR, - "tx_free_thresh must be less than the number of TX " - "descriptors. (tx_free_thresh=%u port=%d " - "queue=%d)", (unsigned int)tx_free_thresh, - dev->data->port_id, (int)queue_idx); - return -(EINVAL); - } - - /* - * Free memory prior to re-allocation if needed. This is the case after - * calling nfp_net_stop - */ - if (dev->data->tx_queues[queue_idx]) { - PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", - queue_idx); - nfp_net_tx_queue_release(dev->data->tx_queues[queue_idx]); - dev->data->tx_queues[queue_idx] = NULL; - } - - /* Allocating tx queue data structure */ - txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq), - RTE_CACHE_LINE_SIZE, socket_id); - if (txq == NULL) { - PMD_DRV_LOG(ERR, "Error allocating tx dma"); - return -ENOMEM; - } - - /* - * Allocate TX ring hardware descriptors. A memzone large enough to - * handle the maximum ring size is allocated in order to allow for - * resizing in later calls to the queue setup function. - */ - tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, - sizeof(struct nfp_net_tx_desc) * - NFP_NET_MAX_TX_DESC, NFP_MEMZONE_ALIGN, - socket_id); - if (tz == NULL) { - PMD_DRV_LOG(ERR, "Error allocating tx dma"); - nfp_net_tx_queue_release(txq); - return -ENOMEM; - } - - txq->tx_count = nb_desc; - txq->tx_free_thresh = tx_free_thresh; - txq->tx_pthresh = tx_conf->tx_thresh.pthresh; - txq->tx_hthresh = tx_conf->tx_thresh.hthresh; - txq->tx_wthresh = tx_conf->tx_thresh.wthresh; - - /* queue mapping based on firmware configuration */ - txq->qidx = queue_idx; - txq->tx_qcidx = queue_idx * hw->stride_tx; - txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx); - - txq->port_id = dev->data->port_id; - - /* Saving physical and virtual addresses for the TX ring */ - txq->dma = (uint64_t)tz->iova; - txq->txds = (struct nfp_net_tx_desc *)tz->addr; - - /* mbuf pointers array for referencing mbufs linked to TX descriptors */ - txq->txbufs = rte_zmalloc_socket("txq->txbufs", - sizeof(*txq->txbufs) * nb_desc, - RTE_CACHE_LINE_SIZE, socket_id); - if (txq->txbufs == NULL) { - nfp_net_tx_queue_release(txq); - return -ENOMEM; - } - PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, - txq->txbufs, txq->txds, (unsigned long int)txq->dma); - - nfp_net_reset_tx_queue(txq); - - dev->data->tx_queues[queue_idx] = txq; - txq->hw = hw; - - /* - * Telling the HW about the physical address of the TX ring and number - * of descriptors in log2 format - */ - nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma); - nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(nb_desc)); - - return 0; -} - -/* nfp_net_tx_tso - Set TX descriptor for TSO */ -static inline void -nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if (!(hw->cap & NFP_NET_CFG_CTRL_LSO_ANY)) - goto clean_txd; - - ol_flags = mb->ol_flags; - - if (!(ol_flags & PKT_TX_TCP_SEG)) - goto clean_txd; - - txd->l3_offset = mb->l2_len; - txd->l4_offset = mb->l2_len + mb->l3_len; - txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; - txd->mss = rte_cpu_to_le_16(mb->tso_segsz); - txd->flags = PCIE_DESC_TX_LSO; - return; - -clean_txd: - txd->flags = 0; - txd->l3_offset = 0; - txd->l4_offset = 0; - txd->lso_hdrlen = 0; - txd->mss = 0; -} - -/* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */ -static inline void -nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if (!(hw->cap & NFP_NET_CFG_CTRL_TXCSUM)) - return; - - ol_flags = mb->ol_flags; - - /* IPv6 does not need checksum */ - if (ol_flags & PKT_TX_IP_CKSUM) - txd->flags |= PCIE_DESC_TX_IP4_CSUM; - - switch (ol_flags & PKT_TX_L4_MASK) { - case PKT_TX_UDP_CKSUM: - txd->flags |= PCIE_DESC_TX_UDP_CSUM; - break; - case PKT_TX_TCP_CKSUM: - txd->flags |= PCIE_DESC_TX_TCP_CSUM; - break; - } - - if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)) - txd->flags |= PCIE_DESC_TX_CSUM; -} - -/* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */ -static inline void -nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd, - struct rte_mbuf *mb) -{ - struct nfp_net_hw *hw = rxq->hw; - - if (!(hw->ctrl & NFP_NET_CFG_CTRL_RXCSUM)) - return; - - /* If IPv4 and IP checksum error, fail */ - if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) && - !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))) - mb->ol_flags |= PKT_RX_IP_CKSUM_BAD; - else - mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - - /* If neither UDP nor TCP return */ - if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) && - !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM)) - return; - - if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK)) - mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - else - mb->ol_flags |= PKT_RX_L4_CKSUM_BAD; -} - -#define NFP_HASH_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 4) -#define NFP_HASH_TYPE_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 8) - -#define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK) - -/* - * nfp_net_set_hash - Set mbuf hash data - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the mbuf fields. - */ -static inline void -nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd, - struct rte_mbuf *mbuf) -{ - struct nfp_net_hw *hw = rxq->hw; - uint8_t *meta_offset; - uint32_t meta_info; - uint32_t hash = 0; - uint32_t hash_type = 0; - - if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) - return; - - /* this is true for new firmwares */ - if (likely(((hw->cap & NFP_NET_CFG_CTRL_RSS2) || - (NFD_CFG_MAJOR_VERSION_of(hw->ver) == 4)) && - NFP_DESC_META_LEN(rxd))) { - /* - * new metadata api: - * <---- 32 bit -----> - * m field type word - * e data field #2 - * t data field #1 - * a data field #0 - * ==================== - * packet data - * - * Field type word contains up to 8 4bit field types - * A 4bit field type refers to a data field word - * A data field word can have several 4bit field types - */ - meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *); - meta_offset -= NFP_DESC_META_LEN(rxd); - meta_info = rte_be_to_cpu_32(*(uint32_t *)meta_offset); - meta_offset += 4; - /* NFP PMD just supports metadata for hashing */ - switch (meta_info & NFP_NET_META_FIELD_MASK) { - case NFP_NET_META_HASH: - /* next field type is about the hash type */ - meta_info >>= NFP_NET_META_FIELD_SIZE; - /* hash value is in the data field */ - hash = rte_be_to_cpu_32(*(uint32_t *)meta_offset); - hash_type = meta_info & NFP_NET_META_FIELD_MASK; - break; - default: - /* Unsupported metadata can be a performance issue */ - return; - } - } else { - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) - return; - - hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET); - hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET); - } - - mbuf->hash.rss = hash; - mbuf->ol_flags |= PKT_RX_RSS_HASH; - - switch (hash_type) { - case NFP_NET_RSS_IPV4: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV4; - break; - case NFP_NET_RSS_IPV6: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6; - break; - case NFP_NET_RSS_IPV6_EX: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV4_TCP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV6_TCP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV4_UDP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - case NFP_NET_RSS_IPV6_UDP: - mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; - break; - default: - mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK; - } -} - -static inline void -nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq) -{ - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; -} - -#define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK) - -/* - * RX path design: - * - * There are some decisions to take: - * 1) How to check DD RX descriptors bit - * 2) How and when to allocate new mbufs - * - * Current implementation checks just one single DD bit each loop. As each - * descriptor is 8 bytes, it is likely a good idea to check descriptors in - * a single cache line instead. Tests with this change have not shown any - * performance improvement but it requires further investigation. For example, - * depending on which descriptor is next, the number of descriptors could be - * less than 8 for just checking those in the same cache line. This implies - * extra work which could be counterproductive by itself. Indeed, last firmware - * changes are just doing this: writing several descriptors with the DD bit - * for saving PCIe bandwidth and DMA operations from the NFP. - * - * Mbuf allocation is done when a new packet is received. Then the descriptor - * is automatically linked with the new mbuf and the old one is given to the - * user. The main drawback with this design is mbuf allocation is heavier than - * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the - * cache point of view it does not seem allocating the mbuf early on as we are - * doing now have any benefit at all. Again, tests with this change have not - * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing - * so looking at the implications of this type of allocation should be studied - * deeply - */ - -static uint16_t -nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) -{ - struct nfp_net_rxq *rxq; - struct nfp_net_rx_desc *rxds; - struct nfp_net_rx_buff *rxb; - struct nfp_net_hw *hw; - struct rte_mbuf *mb; - struct rte_mbuf *new_mb; - uint16_t nb_hold; - uint64_t dma_addr; - int avail; - - rxq = rx_queue; - if (unlikely(rxq == NULL)) { - /* - * DPDK just checks the queue is lower than max queues - * enabled. But the queue needs to be configured - */ - RTE_LOG_DP(ERR, PMD, "RX Bad queue\n"); - return -EINVAL; - } - - hw = rxq->hw; - avail = 0; - nb_hold = 0; - - while (avail < nb_pkts) { - rxb = &rxq->rxbufs[rxq->rd_p]; - if (unlikely(rxb == NULL)) { - RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n"); - break; - } - - rxds = &rxq->rxds[rxq->rd_p]; - if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) - break; - - /* - * Memory barrier to ensure that we won't do other - * reads before the DD bit. - */ - rte_rmb(); - - /* - * We got a packet. Let's alloc a new mbuf for refilling the - * free descriptor ring as soon as possible - */ - new_mb = rte_pktmbuf_alloc(rxq->mem_pool); - if (unlikely(new_mb == NULL)) { - RTE_LOG_DP(DEBUG, PMD, - "RX mbuf alloc failed port_id=%u queue_id=%u\n", - rxq->port_id, (unsigned int)rxq->qidx); - nfp_net_mbuf_alloc_failed(rxq); - break; - } - - nb_hold++; - - /* - * Grab the mbuf and refill the descriptor with the - * previously allocated mbuf - */ - mb = rxb->mbuf; - rxb->mbuf = new_mb; - - PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u", - rxds->rxd.data_len, rxq->mbuf_size); - - /* Size of this segment */ - mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); - /* Size of the whole packet. We just support 1 segment */ - mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); - - if (unlikely((mb->data_len + hw->rx_offset) > - rxq->mbuf_size)) { - /* - * This should not happen and the user has the - * responsibility of avoiding it. But we have - * to give some info about the error - */ - RTE_LOG_DP(ERR, PMD, - "mbuf overflow likely due to the RX offset.\n" - "\t\tYour mbuf size should have extra space for" - " RX offset=%u bytes.\n" - "\t\tCurrently you just have %u bytes available" - " but the received packet is %u bytes long", - hw->rx_offset, - rxq->mbuf_size - hw->rx_offset, - mb->data_len); - return -EINVAL; - } - - /* Filling the received mbuf with packet info */ - if (hw->rx_offset) - mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; - else - mb->data_off = RTE_PKTMBUF_HEADROOM + - NFP_DESC_META_LEN(rxds); - - /* No scatter mode supported */ - mb->nb_segs = 1; - mb->next = NULL; - - mb->port = rxq->port_id; - - /* Checking the RSS flag */ - nfp_net_set_hash(rxq, rxds, mb); - - /* Checking the checksum flag */ - nfp_net_rx_cksum(rxq, rxds, mb); - - if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) && - (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) { - mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan); - mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - } - - /* Adding the mbuf to the mbuf array passed by the app */ - rx_pkts[avail++] = mb; - - /* Now resetting and updating the descriptor */ - rxds->vals[0] = 0; - rxds->vals[1] = 0; - dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb)); - rxds->fld.dd = 0; - rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; - rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; - - rxq->rd_p++; - if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/ - rxq->rd_p = 0; - } - - if (nb_hold == 0) - return nb_hold; - - PMD_RX_LOG(DEBUG, "RX port_id=%u queue_id=%u, %d packets received", - rxq->port_id, (unsigned int)rxq->qidx, nb_hold); - - nb_hold += rxq->nb_rx_hold; - - /* - * FL descriptors needs to be written before incrementing the - * FL queue WR pointer - */ - rte_wmb(); - if (nb_hold > rxq->rx_free_thresh) { - PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u", - rxq->port_id, (unsigned int)rxq->qidx, - (unsigned)nb_hold, (unsigned)avail); - nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); - nb_hold = 0; - } - rxq->nb_rx_hold = nb_hold; - - return avail; -} - -/* - * nfp_net_tx_free_bufs - Check for descriptors with a complete - * status - * @txq: TX queue to work with - * Returns number of descriptors freed - */ -int -nfp_net_tx_free_bufs(struct nfp_net_txq *txq) -{ - uint32_t qcp_rd_p; - int todo; - - PMD_TX_LOG(DEBUG, "queue %u. Check for descriptor with a complete" - " status", txq->qidx); - - /* Work out how many packets have been sent */ - qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); - - if (qcp_rd_p == txq->rd_p) { - PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending " - "packets (%u, %u)", txq->qidx, - qcp_rd_p, txq->rd_p); - return 0; - } - - if (qcp_rd_p > txq->rd_p) - todo = qcp_rd_p - txq->rd_p; - else - todo = qcp_rd_p + txq->tx_count - txq->rd_p; - - PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u", - qcp_rd_p, txq->rd_p, txq->rd_p); - - if (todo == 0) - return todo; - - txq->rd_p += todo; - if (unlikely(txq->rd_p >= txq->tx_count)) - txq->rd_p -= txq->tx_count; - - return todo; -} - -/* Leaving always free descriptors for avoiding wrapping confusion */ -static inline -uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq) -{ - if (txq->wr_p >= txq->rd_p) - return txq->tx_count - (txq->wr_p - txq->rd_p) - 8; - else - return txq->rd_p - txq->wr_p - 8; -} - -/* - * nfp_net_txq_full - Check if the TX queue free descriptors - * is below tx_free_threshold - * - * @txq: TX queue to check - * - * This function uses the host copy* of read/write pointers - */ -static inline -uint32_t nfp_net_txq_full(struct nfp_net_txq *txq) -{ - return (nfp_free_tx_desc(txq) < txq->tx_free_thresh); -} - -static uint16_t -nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - struct nfp_net_txq *txq; - struct nfp_net_hw *hw; - struct nfp_net_tx_desc *txds, txd; - struct rte_mbuf *pkt; - uint64_t dma_addr; - int pkt_size, dma_size; - uint16_t free_descs, issued_descs; - struct rte_mbuf **lmbuf; - int i; - - txq = tx_queue; - hw = txq->hw; - txds = &txq->txds[txq->wr_p]; - - PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets", - txq->qidx, txq->wr_p, nb_pkts); - - if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq))) - nfp_net_tx_free_bufs(txq); - - free_descs = (uint16_t)nfp_free_tx_desc(txq); - if (unlikely(free_descs == 0)) - return 0; - - pkt = *tx_pkts; - - i = 0; - issued_descs = 0; - PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets", - txq->qidx, nb_pkts); - /* Sending packets */ - while ((i < nb_pkts) && free_descs) { - /* Grabbing the mbuf linked to the current descriptor */ - lmbuf = &txq->txbufs[txq->wr_p].mbuf; - /* Warming the cache for releasing the mbuf later on */ - RTE_MBUF_PREFETCH_TO_FREE(*lmbuf); - - pkt = *(tx_pkts + i); - - if (unlikely((pkt->nb_segs > 1) && - !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) { - PMD_INIT_LOG(INFO, "NFP_NET_CFG_CTRL_GATHER not set"); - rte_panic("Multisegment packet unsupported\n"); - } - - /* Checking if we have enough descriptors */ - if (unlikely(pkt->nb_segs > free_descs)) - goto xmit_end; - - /* - * Checksum and VLAN flags just in the first descriptor for a - * multisegment packet, but TSO info needs to be in all of them. - */ - txd.data_len = pkt->pkt_len; - nfp_net_tx_tso(txq, &txd, pkt); - nfp_net_tx_cksum(txq, &txd, pkt); - - if ((pkt->ol_flags & PKT_TX_VLAN_PKT) && - (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)) { - txd.flags |= PCIE_DESC_TX_VLAN; - txd.vlan = pkt->vlan_tci; - } - - /* - * mbuf data_len is the data in one segment and pkt_len data - * in the whole packet. When the packet is just one segment, - * then data_len = pkt_len - */ - pkt_size = pkt->pkt_len; - - while (pkt) { - /* Copying TSO, VLAN and cksum info */ - *txds = txd; - - /* Releasing mbuf used by this descriptor previously*/ - if (*lmbuf) - rte_pktmbuf_free_seg(*lmbuf); - - /* - * Linking mbuf with descriptor for being released - * next time descriptor is used - */ - *lmbuf = pkt; - - dma_size = pkt->data_len; - dma_addr = rte_mbuf_data_iova(pkt); - PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:" - "%" PRIx64 "", dma_addr); - - /* Filling descriptors fields */ - txds->dma_len = dma_size; - txds->data_len = txd.data_len; - txds->dma_addr_hi = (dma_addr >> 32) & 0xff; - txds->dma_addr_lo = (dma_addr & 0xffffffff); - ASSERT(free_descs > 0); - free_descs--; - - txq->wr_p++; - if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/ - txq->wr_p = 0; - - pkt_size -= dma_size; - - /* - * Making the EOP, packets with just one segment - * the priority - */ - if (likely(!pkt_size)) - txds->offset_eop = PCIE_DESC_TX_EOP; - else - txds->offset_eop = 0; - - pkt = pkt->next; - /* Referencing next free TX descriptor */ - txds = &txq->txds[txq->wr_p]; - lmbuf = &txq->txbufs[txq->wr_p].mbuf; - issued_descs++; - } - i++; - } - -xmit_end: - /* Increment write pointers. Force memory write before we let HW know */ - rte_wmb(); - nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs); - - return i; -} - -static int -nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) -{ - uint32_t new_ctrl, update; - struct nfp_net_hw *hw; - int ret; - - hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - new_ctrl = 0; - - /* Enable vlan strip if it is not configured yet */ - if ((mask & ETH_VLAN_STRIP_OFFLOAD) && - !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) - new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN; - - /* Disable vlan strip just if it is configured */ - if (!(mask & ETH_VLAN_STRIP_OFFLOAD) && - (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) - new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN; - - if (new_ctrl == 0) - return 0; - - update = NFP_NET_CFG_UPDATE_GEN; - - ret = nfp_net_reconfig(hw, new_ctrl, update); - if (!ret) - hw->ctrl = new_ctrl; - - return ret; -} - -static int -nfp_net_rss_reta_write(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - uint32_t reta, mask; - int i, j; - int idx, shift; - struct nfp_net_hw *hw = - NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) { - PMD_DRV_LOG(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " - "(%d)", reta_size, NFP_NET_CFG_RSS_ITBL_SZ); - return -EINVAL; - } - - /* - * Update Redirection Table. There are 128 8bit-entries which can be - * manage as 32 32bit-entries + * Update Redirection Table. There are 128 8bit-entries which can be + * manage as 32 32bit-entries */ for (i = 0; i < reta_size; i += 4) { /* Handling 4 RSS entries per loop */ @@ -2494,7 +1372,7 @@ nfp_net_rss_reta_write(struct rte_eth_dev *dev, } /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */ -static int +int nfp_net_reta_update(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) @@ -2520,7 +1398,7 @@ nfp_net_reta_update(struct rte_eth_dev *dev, } /* Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. */ -static int +int nfp_net_reta_query(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) @@ -2617,7 +1495,7 @@ nfp_net_rss_hash_write(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf) { @@ -2653,7 +1531,7 @@ nfp_net_rss_hash_update(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf) { @@ -2704,7 +1582,7 @@ nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } -static int +int nfp_net_rss_config_default(struct rte_eth_dev *dev) { struct rte_eth_conf *dev_conf; @@ -2965,8 +1843,6 @@ nfp_net_init(struct rte_eth_dev *eth_dev) if (hw->is_phyport) { nfp_net_pf_read_mac(pf_dev, port); nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); - } else { - nfp_net_vf_read_mac(hw); } if (!rte_is_valid_assigned_ether_addr( @@ -3015,353 +1891,6 @@ dev_err_ctrl_map: return err; } -#define NFP_CPP_MEMIO_BOUNDARY (1 << 20) - -/* - * Serving a write request to NFP from host programs. The request - * sends the write size and the CPP target. The bridge makes use - * of CPP interface handler configured by the PMD setup. - */ -static int -nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) -{ - struct nfp_cpp_area *area; - off_t offset, nfp_offset; - uint32_t cpp_id, pos, len; - uint32_t tmpbuf[16]; - size_t count, curlen, totlen = 0; - int err = 0; - - PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, - sizeof(off_t), sizeof(size_t)); - - /* Reading the count param */ - err = recv(sockfd, &count, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - curlen = count; - - /* Reading the offset param */ - err = recv(sockfd, &offset, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - /* Obtain target's CPP ID and offset in target */ - cpp_id = (offset >> 40) << 8; - nfp_offset = offset & ((1ull << 40) - 1); - - PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, - offset); - PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, - cpp_id, nfp_offset); - - /* Adjust length if not aligned */ - if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) != - (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) { - curlen = NFP_CPP_MEMIO_BOUNDARY - - (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1)); - } - - while (count > 0) { - /* configure a CPP PCIe2CPP BAR for mapping the CPP target */ - area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev", - nfp_offset, curlen); - if (!area) { - RTE_LOG(ERR, PMD, "%s: area alloc fail\n", __func__); - return -EIO; - } - - /* mapping the target */ - err = nfp_cpp_area_acquire(area); - if (err < 0) { - RTE_LOG(ERR, PMD, "area acquire failed\n"); - nfp_cpp_area_free(area); - return -EIO; - } - - for (pos = 0; pos < curlen; pos += len) { - len = curlen - pos; - if (len > sizeof(tmpbuf)) - len = sizeof(tmpbuf); - - PMD_CPP_LOG(DEBUG, "%s: Receive %u of %zu\n", __func__, - len, count); - err = recv(sockfd, tmpbuf, len, MSG_WAITALL); - if (err != (int)len) { - RTE_LOG(ERR, PMD, - "%s: error when receiving, %d of %zu\n", - __func__, err, count); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - err = nfp_cpp_area_write(area, pos, tmpbuf, len); - if (err < 0) { - RTE_LOG(ERR, PMD, "nfp_cpp_area_write error\n"); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - } - - nfp_offset += pos; - totlen += pos; - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - - count -= pos; - curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ? - NFP_CPP_MEMIO_BOUNDARY : count; - } - - return 0; -} - -/* - * Serving a read request to NFP from host programs. The request - * sends the read size and the CPP target. The bridge makes use - * of CPP interface handler configured by the PMD setup. The read - * data is sent to the requester using the same socket. - */ -static int -nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) -{ - struct nfp_cpp_area *area; - off_t offset, nfp_offset; - uint32_t cpp_id, pos, len; - uint32_t tmpbuf[16]; - size_t count, curlen, totlen = 0; - int err = 0; - - PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, - sizeof(off_t), sizeof(size_t)); - - /* Reading the count param */ - err = recv(sockfd, &count, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - curlen = count; - - /* Reading the offset param */ - err = recv(sockfd, &offset, sizeof(off_t), 0); - if (err != sizeof(off_t)) - return -EINVAL; - - /* Obtain target's CPP ID and offset in target */ - cpp_id = (offset >> 40) << 8; - nfp_offset = offset & ((1ull << 40) - 1); - - PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, - offset); - PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, - cpp_id, nfp_offset); - - /* Adjust length if not aligned */ - if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) != - (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) { - curlen = NFP_CPP_MEMIO_BOUNDARY - - (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1)); - } - - while (count > 0) { - area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev", - nfp_offset, curlen); - if (!area) { - RTE_LOG(ERR, PMD, "%s: area alloc failed\n", __func__); - return -EIO; - } - - err = nfp_cpp_area_acquire(area); - if (err < 0) { - RTE_LOG(ERR, PMD, "area acquire failed\n"); - nfp_cpp_area_free(area); - return -EIO; - } - - for (pos = 0; pos < curlen; pos += len) { - len = curlen - pos; - if (len > sizeof(tmpbuf)) - len = sizeof(tmpbuf); - - err = nfp_cpp_area_read(area, pos, tmpbuf, len); - if (err < 0) { - RTE_LOG(ERR, PMD, "nfp_cpp_area_read error\n"); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - PMD_CPP_LOG(DEBUG, "%s: sending %u of %zu\n", __func__, - len, count); - - err = send(sockfd, tmpbuf, len, 0); - if (err != (int)len) { - RTE_LOG(ERR, PMD, - "%s: error when sending: %d of %zu\n", - __func__, err, count); - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - return -EIO; - } - } - - nfp_offset += pos; - totlen += pos; - nfp_cpp_area_release(area); - nfp_cpp_area_free(area); - - count -= pos; - curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ? - NFP_CPP_MEMIO_BOUNDARY : count; - } - return 0; -} - -#define NFP_IOCTL 'n' -#define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t) -/* - * Serving a ioctl command from host NFP tools. This usually goes to - * a kernel driver char driver but it is not available when the PF is - * bound to the PMD. Currently just one ioctl command is served and it - * does not require any CPP access at all. - */ -static int -nfp_cpp_bridge_serve_ioctl(int sockfd, struct nfp_cpp *cpp) -{ - uint32_t cmd, ident_size, tmp; - int err; - - /* Reading now the IOCTL command */ - err = recv(sockfd, &cmd, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__); - return -EIO; - } - - /* Only supporting NFP_IOCTL_CPP_IDENTIFICATION */ - if (cmd != NFP_IOCTL_CPP_IDENTIFICATION) { - RTE_LOG(ERR, PMD, "%s: unknown cmd %d\n", __func__, cmd); - return -EINVAL; - } - - err = recv(sockfd, &ident_size, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__); - return -EIO; - } - - tmp = nfp_cpp_model(cpp); - - PMD_CPP_LOG(DEBUG, "%s: sending NFP model %08x\n", __func__, tmp); - - err = send(sockfd, &tmp, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__); - return -EIO; - } - - tmp = cpp->interface; - - PMD_CPP_LOG(DEBUG, "%s: sending NFP interface %08x\n", __func__, tmp); - - err = send(sockfd, &tmp, 4, 0); - if (err != 4) { - RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__); - return -EIO; - } - - return 0; -} - -#define NFP_BRIDGE_OP_READ 20 -#define NFP_BRIDGE_OP_WRITE 30 -#define NFP_BRIDGE_OP_IOCTL 40 - -/* - * This is the code to be executed by a service core. The CPP bridge interface - * is based on a unix socket and requests usually received by a kernel char - * driver, read, write and ioctl, are handled by the CPP bridge. NFP host tools - * can be executed with a wrapper library and LD_LIBRARY being completely - * unaware of the CPP bridge performing the NFP kernel char driver for CPP - * accesses. - */ -static int32_t -nfp_cpp_bridge_service_func(void *args) -{ - struct sockaddr address; - struct nfp_cpp *cpp = args; - int sockfd, datafd, op, ret; - - unlink("/tmp/nfp_cpp"); - sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd < 0) { - RTE_LOG(ERR, PMD, "%s: socket creation error. Service failed\n", - __func__); - return -EIO; - } - - memset(&address, 0, sizeof(struct sockaddr)); - - address.sa_family = AF_UNIX; - strcpy(address.sa_data, "/tmp/nfp_cpp"); - - ret = bind(sockfd, (const struct sockaddr *)&address, - sizeof(struct sockaddr)); - if (ret < 0) { - RTE_LOG(ERR, PMD, "%s: bind error (%d). Service failed\n", - __func__, errno); - close(sockfd); - return ret; - } - - ret = listen(sockfd, 20); - if (ret < 0) { - RTE_LOG(ERR, PMD, "%s: listen error(%d). Service failed\n", - __func__, errno); - close(sockfd); - return ret; - } - - for (;;) { - datafd = accept(sockfd, NULL, NULL); - if (datafd < 0) { - RTE_LOG(ERR, PMD, "%s: accept call error (%d)\n", - __func__, errno); - RTE_LOG(ERR, PMD, "%s: service failed\n", __func__); - close(sockfd); - return -EIO; - } - - while (1) { - ret = recv(datafd, &op, 4, 0); - if (ret <= 0) { - PMD_CPP_LOG(DEBUG, "%s: socket close\n", - __func__); - break; - } - - PMD_CPP_LOG(DEBUG, "%s: getting op %u\n", __func__, op); - - if (op == NFP_BRIDGE_OP_READ) - nfp_cpp_bridge_serve_read(datafd, cpp); - - if (op == NFP_BRIDGE_OP_WRITE) - nfp_cpp_bridge_serve_write(datafd, cpp); - - if (op == NFP_BRIDGE_OP_IOCTL) - nfp_cpp_bridge_serve_ioctl(datafd, cpp); - - if (op == 0) - break; - } - close(datafd); - } - close(sockfd); - - return 0; -} - #define DEFAULT_FW_PATH "/lib/firmware/netronome" static int @@ -3484,34 +2013,14 @@ static int nfp_init_phyports(struct nfp_pf_dev *pf_dev) snprintf(port_name, sizeof(port_name), "%s_port%d", pf_dev->pci_dev->device.name, i); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { - eth_dev = rte_eth_dev_attach_secondary(port_name); - if (!eth_dev) { - RTE_LOG(ERR, EAL, - "secondary process attach failed, " - "ethdev doesn't exist"); - ret = -ENODEV; - goto error; - } - - eth_dev->process_private = pf_dev->cpp; - goto nfp_net_init; - } - - /* First port has already been initialized */ - if (i == 0) { - eth_dev = pf_dev->eth_dev; - goto skip_dev_alloc; - } - - /* Allocate a eth_dev for remaining ports */ + /* Allocate a eth_dev for this phyport */ eth_dev = rte_eth_dev_allocate(port_name); if (!eth_dev) { ret = -ENODEV; goto port_cleanup; } - /* Allocate memory for remaining ports */ + /* Allocate memory for this phyport */ eth_dev->data->dev_private = rte_zmalloc_socket(port_name, sizeof(struct nfp_net_hw), RTE_CACHE_LINE_SIZE, numa_node); @@ -3521,7 +2030,6 @@ static int nfp_init_phyports(struct nfp_pf_dev *pf_dev) goto port_cleanup; } -skip_dev_alloc: hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); /* Add this device to the PF's array of physical ports */ @@ -3534,7 +2042,6 @@ skip_dev_alloc: hw->nfp_idx = nfp_eth_table->ports[i].index; hw->is_phyport = true; -nfp_net_init: eth_dev->device = &pf_dev->pci_dev->device; /* ctrl/tx/rx BAR mappings and remaining init happens in @@ -3568,24 +2075,18 @@ error: return ret; } -static int nfp_pf_init(struct rte_eth_dev *eth_dev) +static int nfp_pf_init(struct rte_pci_device *pci_dev) { - struct rte_pci_device *pci_dev; - struct nfp_net_hw *hw = NULL; struct nfp_pf_dev *pf_dev = NULL; struct nfp_cpp *cpp; struct nfp_hwinfo *hwinfo; struct nfp_rtsym_table *sym_tbl; struct nfp_eth_table *nfp_eth_table = NULL; - struct rte_service_spec service; char name[RTE_ETH_NAME_MAX_LEN]; int total_ports; int ret = -ENODEV; int err; - pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); - hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev); - if (!pci_dev) return ret; @@ -3621,12 +2122,10 @@ static int nfp_pf_init(struct rte_eth_dev *eth_dev) goto hwinfo_cleanup; } - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - if (nfp_fw_setup(pci_dev, cpp, nfp_eth_table, hwinfo)) { - PMD_INIT_LOG(ERR, "Error when uploading firmware"); - ret = -EIO; - goto eth_table_cleanup; - } + if (nfp_fw_setup(pci_dev, cpp, nfp_eth_table, hwinfo)) { + PMD_INIT_LOG(ERR, "Error when uploading firmware"); + ret = -EIO; + goto eth_table_cleanup; } /* Now the symbol table should be there */ @@ -3653,7 +2152,7 @@ static int nfp_pf_init(struct rte_eth_dev *eth_dev) goto sym_tbl_cleanup; } /* Allocate memory for the PF "device" */ - snprintf(name, sizeof(name), "nfp_pf%d", eth_dev->data->port_id); + snprintf(name, sizeof(name), "nfp_pf%d", 0); pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0); if (!pf_dev) { ret = -ENOMEM; @@ -3671,9 +2170,6 @@ static int nfp_pf_init(struct rte_eth_dev *eth_dev) pf_dev->pci_dev = pci_dev; - /* The first eth_dev is part of the PF struct */ - pf_dev->eth_dev = eth_dev; - /* Map the symbol table */ pf_dev->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_bar0", pf_dev->total_phyports * 32768, @@ -3708,24 +2204,8 @@ static int nfp_pf_init(struct rte_eth_dev *eth_dev) goto hwqueues_cleanup; } - /* - * The rte_service needs to be created just once per PMD. - * And the cpp handler needs to be linked to the service. - * Secondary processes will be used for debugging DPDK apps - * when requiring to use the CPP interface for accessing NFP - * components. And the cpp handler for secondary processes is - * available at this point. - */ - memset(&service, 0, sizeof(struct rte_service_spec)); - snprintf(service.name, sizeof(service.name), "nfp_cpp_service"); - service.callback = nfp_cpp_bridge_service_func; - service.callback_userdata = (void *)cpp; - - if (rte_service_component_register(&service, - &hw->nfp_cpp_service_id)) - RTE_LOG(ERR, PMD, "NFP CPP bridge service register() failed"); - else - RTE_LOG(DEBUG, PMD, "NFP CPP bridge service registered"); + /* register the CPP bridge service here for primary use */ + nfp_register_cpp_service(pf_dev->cpp); return 0; @@ -3745,11 +2225,89 @@ error: return ret; } +/* + * When attaching to the NFP4000/6000 PF on a secondary process there + * is no need to initialize the PF again. Only minimal work is required + * here + */ +static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev) +{ + struct nfp_cpp *cpp; + struct nfp_rtsym_table *sym_tbl; + int total_ports; + int i; + int err; + + if (!pci_dev) + return -ENODEV; + + /* + * When device bound to UIO, the device could be used, by mistake, + * by two DPDK apps, and the UIO driver does not avoid it. This + * could lead to a serious problem when configuring the NFP CPP + * interface. Here we avoid this telling to the CPP init code to + * use a lock file if UIO is being used. + */ + if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO) + cpp = nfp_cpp_from_device_name(pci_dev, 0); + else + cpp = nfp_cpp_from_device_name(pci_dev, 1); + + if (!cpp) { + PMD_INIT_LOG(ERR, "A CPP handle can not be obtained"); + return -EIO; + } + + /* + * We don't have access to the PF created in the primary process + * here so we have to read the number of ports from firmware + */ + sym_tbl = nfp_rtsym_table_read(cpp); + if (!sym_tbl) { + PMD_INIT_LOG(ERR, "Something is wrong with the firmware" + " symbol table"); + return -EIO; + } + + total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err); + + for (i = 0; i < total_ports; i++) { + struct rte_eth_dev *eth_dev; + char port_name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(port_name, sizeof(port_name), "%s_port%d", + pci_dev->device.name, i); + + PMD_DRV_LOG(DEBUG, "Secondary attaching to port %s", + port_name); + eth_dev = rte_eth_dev_attach_secondary(port_name); + if (!eth_dev) { + RTE_LOG(ERR, EAL, + "secondary process attach failed, " + "ethdev doesn't exist"); + return -ENODEV; + } + eth_dev->process_private = cpp; + eth_dev->dev_ops = &nfp_net_eth_dev_ops; + eth_dev->rx_queue_count = nfp_net_rx_queue_count; + eth_dev->rx_pkt_burst = &nfp_net_recv_pkts; + eth_dev->tx_pkt_burst = &nfp_net_xmit_pkts; + rte_eth_dev_probing_finish(eth_dev); + } + + /* Register the CPP bridge service for the secondary too */ + nfp_register_cpp_service(cpp); + + return 0; +} + static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *dev) { - return rte_eth_dev_pci_generic_probe(dev, - sizeof(struct nfp_net_hw), nfp_pf_init); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return nfp_pf_init(dev); + else + return nfp_pf_secondary_init(dev); } static const struct rte_pci_id pci_id_nfp_pf_net_map[] = { @@ -3766,16 +2324,6 @@ static const struct rte_pci_id pci_id_nfp_pf_net_map[] = { }, }; -static const struct rte_pci_id pci_id_nfp_vf_net_map[] = { - { - RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME, - PCI_DEVICE_ID_NFP6000_VF_NIC) - }, - { - .vendor_id = 0, - }, -}; - static int nfp_pci_uninit(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; @@ -3799,13 +2347,6 @@ static int nfp_pci_uninit(struct rte_eth_dev *eth_dev) return nfp_net_close(eth_dev); } -static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev) -{ - return rte_eth_dev_pci_generic_probe(pci_dev, - sizeof(struct nfp_net_adapter), nfp_net_init); -} - static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) { return rte_eth_dev_pci_generic_remove(pci_dev, nfp_pci_uninit); @@ -3818,19 +2359,9 @@ static struct rte_pci_driver rte_nfp_net_pf_pmd = { .remove = eth_nfp_pci_remove, }; -static struct rte_pci_driver rte_nfp_net_vf_pmd = { - .id_table = pci_id_nfp_vf_net_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, - .probe = eth_nfp_pci_probe, - .remove = eth_nfp_pci_remove, -}; - RTE_PMD_REGISTER_PCI(net_nfp_pf, rte_nfp_net_pf_pmd); -RTE_PMD_REGISTER_PCI(net_nfp_vf, rte_nfp_net_vf_pmd); RTE_PMD_REGISTER_PCI_TABLE(net_nfp_pf, pci_id_nfp_pf_net_map); -RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map); RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio"); -RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio"); RTE_LOG_REGISTER_SUFFIX(nfp_logtype_init, init, NOTICE); RTE_LOG_REGISTER_SUFFIX(nfp_logtype_driver, driver, NOTICE); /*