From a5aeb2b9e225ef8efc6e7a96543cce670d05f88a Mon Sep 17 00:00:00 2001 From: Alvin Zhang Date: Wed, 15 Apr 2020 16:48:03 +0800 Subject: [PATCH] net/igc: support Rx and Tx Below ops are added too: mac_addr_add mac_addr_remove mac_addr_set set_mc_addr_list mtu_set promiscuous_enable promiscuous_disable allmulticast_enable allmulticast_disable rx_queue_setup rx_queue_release rx_queue_count rx_descriptor_done rx_descriptor_status tx_descriptor_status tx_queue_setup tx_queue_release tx_done_cleanup rxq_info_get txq_info_get dev_supported_ptypes_get Signed-off-by: Alvin Zhang Reviewed-by: Ferruh Yigit --- doc/guides/nics/features/igc.ini | 15 + drivers/net/igc/Makefile | 1 + drivers/net/igc/igc_ethdev.c | 326 ++++- drivers/net/igc/igc_ethdev.h | 62 + drivers/net/igc/igc_logs.h | 14 + drivers/net/igc/igc_txrx.c | 2109 ++++++++++++++++++++++++++++++ drivers/net/igc/igc_txrx.h | 50 + drivers/net/igc/meson.build | 3 +- 8 files changed, 2534 insertions(+), 46 deletions(-) create mode 100644 drivers/net/igc/igc_txrx.c create mode 100644 drivers/net/igc/igc_txrx.h diff --git a/doc/guides/nics/features/igc.ini b/doc/guides/nics/features/igc.ini index 0fbdf7c390..f910483a12 100644 --- a/doc/guides/nics/features/igc.ini +++ b/doc/guides/nics/features/igc.ini @@ -8,6 +8,21 @@ Link status = Y Link status event = Y FW version = Y LED = Y +Packet type parsing = Y +Rx descriptor status = Y +Tx descriptor status = Y +MTU update = Y +Jumbo frame = Y +Scattered Rx = Y +TSO = Y +Promiscuous mode = Y +Allmulticast mode = Y +Unicast MAC filter = Y +Multicast MAC filter = Y +RSS hash = Y +CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y Linux UIO = Y Linux VFIO = Y x86-64 = Y diff --git a/drivers/net/igc/Makefile b/drivers/net/igc/Makefile index 0902811e93..c162c51ed7 100644 --- a/drivers/net/igc/Makefile +++ b/drivers/net/igc/Makefile @@ -33,5 +33,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_IGC_PMD) += igc_osdep.c SRCS-$(CONFIG_RTE_LIBRTE_IGC_PMD) += igc_phy.c SRCS-$(CONFIG_RTE_LIBRTE_IGC_PMD) += igc_logs.c SRCS-$(CONFIG_RTE_LIBRTE_IGC_PMD) += igc_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_IGC_PMD) += igc_txrx.c include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/igc/igc_ethdev.c b/drivers/net/igc/igc_ethdev.c index 423c817b8d..25998db0cf 100644 --- a/drivers/net/igc/igc_ethdev.c +++ b/drivers/net/igc/igc_ethdev.c @@ -12,7 +12,7 @@ #include #include "igc_logs.h" -#include "igc_ethdev.h" +#include "igc_txrx.h" #define IGC_INTEL_VENDOR_ID 0x8086 @@ -45,6 +45,23 @@ /* MSI-X other interrupt vector */ #define IGC_MSIX_OTHER_INTR_VEC 0 +/* External VLAN Enable bit mask */ +#define IGC_CTRL_EXT_EXT_VLAN (1u << 26) + +static const struct rte_eth_desc_lim rx_desc_lim = { + .nb_max = IGC_MAX_RXD, + .nb_min = IGC_MIN_RXD, + .nb_align = IGC_RXD_ALIGN, +}; + +static const struct rte_eth_desc_lim tx_desc_lim = { + .nb_max = IGC_MAX_TXD, + .nb_min = IGC_MIN_TXD, + .nb_align = IGC_TXD_ALIGN, + .nb_seg_max = IGC_TX_MAX_SEG, + .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG, +}; + static const struct rte_pci_id pci_id_igc_map[] = { { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) }, { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V) }, @@ -69,17 +86,18 @@ static int eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static int eth_igc_led_on(struct rte_eth_dev *dev); static int eth_igc_led_off(struct rte_eth_dev *dev); -static void eth_igc_tx_queue_release(void *txq); -static void eth_igc_rx_queue_release(void *rxq); -static int -eth_igc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool); -static int -eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); +static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev); +static int eth_igc_rar_set(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool); +static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index); +static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev, + struct rte_ether_addr *addr); +static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev, + struct rte_ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev); +static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev); +static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); static const struct eth_dev_ops eth_igc_ops = { .dev_configure = eth_igc_configure, @@ -92,16 +110,30 @@ static const struct eth_dev_ops eth_igc_ops = { .dev_set_link_down = eth_igc_set_link_down, .promiscuous_enable = eth_igc_promiscuous_enable, .promiscuous_disable = eth_igc_promiscuous_disable, - + .allmulticast_enable = eth_igc_allmulticast_enable, + .allmulticast_disable = eth_igc_allmulticast_disable, .fw_version_get = eth_igc_fw_version_get, .dev_infos_get = eth_igc_infos_get, .dev_led_on = eth_igc_led_on, .dev_led_off = eth_igc_led_off, + .dev_supported_ptypes_get = eth_igc_supported_ptypes_get, + .mtu_set = eth_igc_mtu_set, + .mac_addr_add = eth_igc_rar_set, + .mac_addr_remove = eth_igc_rar_clear, + .mac_addr_set = eth_igc_default_mac_addr_set, + .set_mc_addr_list = eth_igc_set_mc_addr_list, .rx_queue_setup = eth_igc_rx_queue_setup, .rx_queue_release = eth_igc_rx_queue_release, + .rx_queue_count = eth_igc_rx_queue_count, + .rx_descriptor_done = eth_igc_rx_descriptor_done, + .rx_descriptor_status = eth_igc_rx_descriptor_status, + .tx_descriptor_status = eth_igc_tx_descriptor_status, .tx_queue_setup = eth_igc_tx_queue_setup, .tx_queue_release = eth_igc_tx_queue_release, + .tx_done_cleanup = eth_igc_tx_done_cleanup, + .rxq_info_get = eth_igc_rxq_info_get, + .txq_info_get = eth_igc_txq_info_get, }; /* @@ -366,6 +398,32 @@ eth_igc_interrupt_handler(void *param) eth_igc_interrupt_action(dev); } +/* + * rx,tx enable/disable + */ +static void +eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t tctl, rctl; + + tctl = IGC_READ_REG(hw, IGC_TCTL); + rctl = IGC_READ_REG(hw, IGC_RCTL); + + if (enable) { + /* enable Tx/Rx */ + tctl |= IGC_TCTL_EN; + rctl |= IGC_RCTL_EN; + } else { + /* disable Tx/Rx */ + tctl &= ~IGC_TCTL_EN; + rctl &= ~IGC_RCTL_EN; + } + IGC_WRITE_REG(hw, IGC_TCTL, tctl); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + IGC_WRITE_FLUSH(hw); +} + /* * This routine disables all traffic on the adapter by issuing a * global reset on the MAC. @@ -381,6 +439,9 @@ eth_igc_stop(struct rte_eth_dev *dev) adapter->stopped = 1; + /* disable receive and transmit */ + eth_igc_rxtx_control(dev, false); + /* disable all MSI-X interrupts */ IGC_WRITE_REG(hw, IGC_EIMC, 0x1f); IGC_WRITE_FLUSH(hw); @@ -405,6 +466,8 @@ eth_igc_stop(struct rte_eth_dev *dev) /* Power down the phy. Needed to make the link go Down */ eth_igc_set_link_down(dev); + igc_dev_clear_queues(dev); + /* clear the recorded link status */ memset(&link, 0, sizeof(link)); rte_eth_linkstatus_set(dev, &link); @@ -570,8 +633,7 @@ eth_igc_start(struct rte_eth_dev *dev) struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t *speeds; - int num_speeds; - bool autoneg; + int ret; PMD_INIT_FUNC_TRACE(); @@ -602,6 +664,16 @@ eth_igc_start(struct rte_eth_dev *dev) /* confiugre msix for rx interrupt */ igc_configure_msix_intr(dev); + igc_tx_init(dev); + + /* This can fail when allocating mbufs for descriptor rings */ + ret = igc_rx_init(dev); + if (ret) { + PMD_DRV_LOG(ERR, "Unable to initialize RX hardware"); + igc_dev_clear_queues(dev); + return ret; + } + igc_clear_hw_cntrs_base_generic(hw); /* Setup link speed and duplex */ @@ -610,8 +682,8 @@ eth_igc_start(struct rte_eth_dev *dev) hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500; hw->mac.autoneg = 1; } else { - num_speeds = 0; - autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; + int num_speeds = 0; + bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; /* Reset */ hw->phy.autoneg_advertised = 0; @@ -685,6 +757,7 @@ eth_igc_start(struct rte_eth_dev *dev) /* resume enabled intr since hw reset */ igc_intr_other_enable(dev); + eth_igc_rxtx_control(dev, true); eth_igc_link_update(dev, 0); return 0; @@ -692,6 +765,7 @@ eth_igc_start(struct rte_eth_dev *dev) error_invalid_config: PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u", dev->data->dev_conf.link_speeds, dev->data->port_id); + igc_dev_clear_queues(dev); return -EINVAL; } @@ -749,6 +823,27 @@ igc_reset_swfw_lock(struct igc_hw *hw) return IGC_SUCCESS; } +/* + * free all rx/tx queues. + */ +static void +igc_dev_free_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + eth_igc_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + eth_igc_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; +} + static void eth_igc_close(struct rte_eth_dev *dev) { @@ -776,6 +871,7 @@ eth_igc_close(struct rte_eth_dev *dev) igc_phy_hw_reset(hw); igc_hw_control_release(hw); + igc_dev_free_queues(dev); /* Reset any pending lock */ igc_reset_swfw_lock(hw); @@ -959,16 +1055,55 @@ eth_igc_reset(struct rte_eth_dev *dev) static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev) { - PMD_INIT_FUNC_TRACE(); - RTE_SET_USED(dev); + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); return 0; } static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev) { - PMD_INIT_FUNC_TRACE(); - RTE_SET_USED(dev); + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl &= (~IGC_RCTL_UPE); + if (dev->data->all_multicast == 1) + rctl |= IGC_RCTL_MPE; + else + rctl &= (~IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static int +eth_igc_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t rctl; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl |= IGC_RCTL_MPE; + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + return 0; +} + +static int +eth_igc_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t rctl; + + if (dev->data->promiscuous == 1) + return 0; /* must remain in all_multicast mode */ + + rctl = IGC_READ_REG(hw, IGC_RCTL); + rctl &= (~IGC_RCTL_MPE); + IGC_WRITE_REG(hw, IGC_RCTL, rctl); return 0; } @@ -1018,10 +1153,40 @@ eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE; dev_info->max_mac_addrs = hw->mac.rar_entry_count; + dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL; + dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL; + dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM; dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM; dev_info->max_vmdq_pools = 0; + dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t); + dev_info->reta_size = ETH_RSS_RETA_SIZE_128; + dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL; + + dev_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_thresh = { + .pthresh = IGC_DEFAULT_RX_PTHRESH, + .hthresh = IGC_DEFAULT_RX_HTHRESH, + .wthresh = IGC_DEFAULT_RX_WTHRESH, + }, + .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH, + .rx_drop_en = 0, + .offloads = 0, + }; + + dev_info->default_txconf = (struct rte_eth_txconf) { + .tx_thresh = { + .pthresh = IGC_DEFAULT_TX_PTHRESH, + .hthresh = IGC_DEFAULT_TX_HTHRESH, + .wthresh = IGC_DEFAULT_TX_WTHRESH, + }, + .offloads = 0, + }; + + dev_info->rx_desc_lim = rx_desc_lim; + dev_info->tx_desc_lim = tx_desc_lim; + dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M | ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G; @@ -1047,44 +1212,115 @@ eth_igc_led_off(struct rte_eth_dev *dev) return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP; } +static const uint32_t * +eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev) +{ + static const uint32_t ptypes[] = { + /* refers to rx_desc_pkt_info_to_pkt_type() */ + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L3_IPV4, + RTE_PTYPE_L3_IPV4_EXT, + RTE_PTYPE_L3_IPV6, + RTE_PTYPE_L3_IPV6_EXT, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_SCTP, + RTE_PTYPE_TUNNEL_IP, + RTE_PTYPE_INNER_L3_IPV6, + RTE_PTYPE_INNER_L3_IPV6_EXT, + RTE_PTYPE_INNER_L4_TCP, + RTE_PTYPE_INNER_L4_UDP, + RTE_PTYPE_UNKNOWN + }; + + return ptypes; +} + static int -eth_igc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool) +eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) { - PMD_INIT_FUNC_TRACE(); - RTE_SET_USED(dev); - RTE_SET_USED(rx_queue_id); - RTE_SET_USED(nb_rx_desc); - RTE_SET_USED(socket_id); - RTE_SET_USED(rx_conf); - RTE_SET_USED(mb_pool); + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t frame_size = mtu + IGC_ETH_OVERHEAD; + uint32_t rctl; + + /* if extend vlan has been enabled */ + if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN) + frame_size += VLAN_TAG_SIZE; + + /* check that mtu is within the allowed range */ + if (mtu < RTE_ETHER_MIN_MTU || + frame_size > MAX_RX_JUMBO_FRAME_SIZE) + return -EINVAL; + + /* + * refuse mtu that requires the support of scattered packets when + * this feature has not been enabled before. + */ + if (!dev->data->scattered_rx && + frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) + return -EINVAL; + + rctl = IGC_READ_REG(hw, IGC_RCTL); + + /* switch to jumbo mode if needed */ + if (mtu > RTE_ETHER_MTU) { + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_JUMBO_FRAME; + rctl |= IGC_RCTL_LPE; + } else { + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_JUMBO_FRAME; + rctl &= ~IGC_RCTL_LPE; + } + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + + /* update max frame size */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size; + + IGC_WRITE_REG(hw, IGC_RLPML, + dev->data->dev_conf.rxmode.max_rx_pkt_len); + return 0; } static int -eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, - uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf) +eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr, + uint32_t index, uint32_t pool) { - PMD_INIT_FUNC_TRACE(); - RTE_SET_USED(dev); - RTE_SET_USED(queue_idx); - RTE_SET_USED(nb_desc); - RTE_SET_USED(socket_id); - RTE_SET_USED(tx_conf); + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + + igc_rar_set(hw, mac_addr->addr_bytes, index); + RTE_SET_USED(pool); return 0; } -static void eth_igc_tx_queue_release(void *txq) +static void +eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index) { - RTE_SET_USED(txq); + uint8_t addr[RTE_ETHER_ADDR_LEN]; + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + + memset(addr, 0, sizeof(addr)); + igc_rar_set(hw, addr, index); } -static void eth_igc_rx_queue_release(void *rxq) +static int +eth_igc_default_mac_addr_set(struct rte_eth_dev *dev, + struct rte_ether_addr *addr) { - RTE_SET_USED(rxq); + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + igc_rar_set(hw, addr->addr_bytes, 0); + return 0; +} + +static int +eth_igc_set_mc_addr_list(struct rte_eth_dev *dev, + struct rte_ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr); + return 0; } static int diff --git a/drivers/net/igc/igc_ethdev.h b/drivers/net/igc/igc_ethdev.h index c1e32177d0..3910371390 100644 --- a/drivers/net/igc/igc_ethdev.h +++ b/drivers/net/igc/igc_ethdev.h @@ -18,12 +18,74 @@ extern "C" { #define IGC_QUEUE_PAIRS_NUM 4 +#define IGC_HKEY_MAX_INDEX 10 +#define IGC_RSS_RDT_SIZD 128 + +/* + * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be + * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. + * This will also optimize cache line size effect. + * H/W supports up to cache line size 128. + */ +#define IGC_ALIGN 128 + +#define IGC_TX_DESCRIPTOR_MULTIPLE 8 +#define IGC_RX_DESCRIPTOR_MULTIPLE 8 + +#define IGC_RXD_ALIGN ((uint16_t)(IGC_ALIGN / \ + sizeof(union igc_adv_rx_desc))) +#define IGC_TXD_ALIGN ((uint16_t)(IGC_ALIGN / \ + sizeof(union igc_adv_tx_desc))) +#define IGC_MIN_TXD IGC_TX_DESCRIPTOR_MULTIPLE +#define IGC_MAX_TXD ((uint16_t)(0x80000 / sizeof(union igc_adv_tx_desc))) +#define IGC_MIN_RXD IGC_RX_DESCRIPTOR_MULTIPLE +#define IGC_MAX_RXD ((uint16_t)(0x80000 / sizeof(union igc_adv_rx_desc))) + +#define IGC_TX_MAX_SEG UINT8_MAX +#define IGC_TX_MAX_MTU_SEG UINT8_MAX + +#define IGC_RX_OFFLOAD_ALL ( \ + DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM | \ + DEV_RX_OFFLOAD_SCTP_CKSUM | \ + DEV_RX_OFFLOAD_JUMBO_FRAME | \ + DEV_RX_OFFLOAD_KEEP_CRC | \ + DEV_RX_OFFLOAD_SCATTER) + +#define IGC_TX_OFFLOAD_ALL ( \ + DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_SCTP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_UDP_TSO | \ + DEV_TX_OFFLOAD_MULTI_SEGS) + +#define IGC_RSS_OFFLOAD_ALL ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX) + /* structure for interrupt relative data */ struct igc_interrupt { uint32_t flags; uint32_t mask; }; +/* Union of RSS redirect table register */ +union igc_rss_reta_reg { + uint32_t dword; + uint8_t bytes[4]; +}; + /* * Structure to store private data for each driver instance (for each port). */ diff --git a/drivers/net/igc/igc_logs.h b/drivers/net/igc/igc_logs.h index 67b169966e..6457c4d180 100644 --- a/drivers/net/igc/igc_logs.h +++ b/drivers/net/igc/igc_logs.h @@ -20,6 +20,20 @@ extern int igc_logtype_driver; #define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +#ifdef RTE_LIBRTE_IGC_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_IGC_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#endif + #define PMD_DRV_LOG_RAW(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, igc_logtype_driver, "%s(): " fmt, \ __func__, ## args) diff --git a/drivers/net/igc/igc_txrx.c b/drivers/net/igc/igc_txrx.c new file mode 100644 index 0000000000..3437c4de2d --- /dev/null +++ b/drivers/net/igc/igc_txrx.c @@ -0,0 +1,2109 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include +#include +#include +#include + +#include "igc_logs.h" +#include "igc_txrx.h" + +#ifdef RTE_PMD_USE_PREFETCH +#define rte_igc_prefetch(p) rte_prefetch0(p) +#else +#define rte_igc_prefetch(p) do {} while (0) +#endif + +#ifdef RTE_PMD_PACKET_PREFETCH +#define rte_packet_prefetch(p) rte_prefetch1(p) +#else +#define rte_packet_prefetch(p) do {} while (0) +#endif + +/* Multicast / Unicast table offset mask. */ +#define IGC_RCTL_MO_MSK (3u << IGC_RCTL_MO_SHIFT) + +/* Loopback mode. */ +#define IGC_RCTL_LBM_SHIFT 6 +#define IGC_RCTL_LBM_MSK (3u << IGC_RCTL_LBM_SHIFT) + +/* Hash select for MTA */ +#define IGC_RCTL_HSEL_SHIFT 8 +#define IGC_RCTL_HSEL_MSK (3u << IGC_RCTL_HSEL_SHIFT) +#define IGC_RCTL_PSP (1u << 21) + +/* Receive buffer size for header buffer */ +#define IGC_SRRCTL_BSIZEHEADER_SHIFT 8 + +/* RX descriptor status and error flags */ +#define IGC_RXD_STAT_L4CS (1u << 5) +#define IGC_RXD_STAT_VEXT (1u << 9) +#define IGC_RXD_STAT_LLINT (1u << 11) +#define IGC_RXD_STAT_SCRC (1u << 12) +#define IGC_RXD_STAT_SMDT_MASK (3u << 13) +#define IGC_RXD_STAT_MC (1u << 19) +#define IGC_RXD_EXT_ERR_L4E (1u << 29) +#define IGC_RXD_EXT_ERR_IPE (1u << 30) +#define IGC_RXD_EXT_ERR_RXE (1u << 31) +#define IGC_RXD_RSS_TYPE_MASK 0xfu +#define IGC_RXD_PCTYPE_MASK (0x7fu << 4) +#define IGC_RXD_ETQF_SHIFT 12 +#define IGC_RXD_ETQF_MSK (0xfu << IGC_RXD_ETQF_SHIFT) +#define IGC_RXD_VPKT (1u << 16) + +/* TXD control bits */ +#define IGC_TXDCTL_PTHRESH_SHIFT 0 +#define IGC_TXDCTL_HTHRESH_SHIFT 8 +#define IGC_TXDCTL_WTHRESH_SHIFT 16 +#define IGC_TXDCTL_PTHRESH_MSK (0x1fu << IGC_TXDCTL_PTHRESH_SHIFT) +#define IGC_TXDCTL_HTHRESH_MSK (0x1fu << IGC_TXDCTL_HTHRESH_SHIFT) +#define IGC_TXDCTL_WTHRESH_MSK (0x1fu << IGC_TXDCTL_WTHRESH_SHIFT) + +/* RXD control bits */ +#define IGC_RXDCTL_PTHRESH_SHIFT 0 +#define IGC_RXDCTL_HTHRESH_SHIFT 8 +#define IGC_RXDCTL_WTHRESH_SHIFT 16 +#define IGC_RXDCTL_PTHRESH_MSK (0x1fu << IGC_RXDCTL_PTHRESH_SHIFT) +#define IGC_RXDCTL_HTHRESH_MSK (0x1fu << IGC_RXDCTL_HTHRESH_SHIFT) +#define IGC_RXDCTL_WTHRESH_MSK (0x1fu << IGC_RXDCTL_WTHRESH_SHIFT) + +#define IGC_TSO_MAX_HDRLEN 512 +#define IGC_TSO_MAX_MSS 9216 + +/* Bit Mask to indicate what bits required for building TX context */ +#define IGC_TX_OFFLOAD_MASK ( \ + PKT_TX_OUTER_IPV4 | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG | \ + PKT_TX_UDP_SEG) + +#define IGC_TX_OFFLOAD_SEG (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG) + +#define IGC_ADVTXD_POPTS_TXSM 0x00000200 /* L4 Checksum offload request */ +#define IGC_ADVTXD_POPTS_IXSM 0x00000100 /* IP Checksum offload request */ + +/* L4 Packet TYPE of Reserved */ +#define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800 + +#define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK) + +/** + * Structure associated with each descriptor of the RX ring of a RX queue. + */ +struct igc_rx_entry { + struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */ +}; + +/** + * Structure associated with each RX queue. + */ +struct igc_rx_queue { + struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */ + volatile union igc_adv_rx_desc *rx_ring; + /**< RX ring virtual address. */ + uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */ + volatile uint32_t *rdt_reg_addr; /**< RDT register address. */ + volatile uint32_t *rdh_reg_addr; /**< RDH register address. */ + struct igc_rx_entry *sw_ring; /**< address of RX software ring. */ + struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */ + struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */ + uint16_t nb_rx_desc; /**< number of RX descriptors. */ + uint16_t rx_tail; /**< current value of RDT register. */ + uint16_t nb_rx_hold; /**< number of held free RX desc. */ + uint16_t rx_free_thresh; /**< max free RX desc to hold. */ + uint16_t queue_id; /**< RX queue index. */ + uint16_t reg_idx; /**< RX queue register index. */ + uint16_t port_id; /**< Device port identifier. */ + uint8_t pthresh; /**< Prefetch threshold register. */ + uint8_t hthresh; /**< Host threshold register. */ + uint8_t wthresh; /**< Write-back threshold register. */ + uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ + uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */ + uint32_t flags; /**< RX flags. */ + uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */ +}; + +/** Offload features */ +union igc_tx_offload { + uint64_t data; + struct { + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t vlan_tci:16; + /**< VLAN Tag Control Identifier(CPU order). */ + uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ + uint64_t tso_segsz:16; /**< TCP TSO segment size. */ + /* uint64_t unused:8; */ + }; +}; + +/* + * Compare mask for igc_tx_offload.data, + * should be in sync with igc_tx_offload layout. + */ +#define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */ +#define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */ +#define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */ +#define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */ +/** Mac + IP + TCP + Mss mask. */ +#define TX_TSO_CMP_MASK \ + (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK) + +/** + * Structure to check if new context need be built + */ +struct igc_advctx_info { + uint64_t flags; /**< ol_flags related to context build. */ + /** tx offload: vlan, tso, l2-l3-l4 lengths. */ + union igc_tx_offload tx_offload; + /** compare mask for tx offload. */ + union igc_tx_offload tx_offload_mask; +}; + +/** + * Hardware context number + */ +enum { + IGC_CTX_0 = 0, /**< CTX0 */ + IGC_CTX_1 = 1, /**< CTX1 */ + IGC_CTX_NUM = 2, /**< CTX_NUM */ +}; + +/** + * Structure associated with each descriptor of the TX ring of a TX queue. + */ +struct igc_tx_entry { + struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */ + uint16_t next_id; /**< Index of next descriptor in ring. */ + uint16_t last_id; /**< Index of last scattered descriptor. */ +}; + +/** + * Structure associated with each TX queue. + */ +struct igc_tx_queue { + volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */ + uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */ + struct igc_tx_entry *sw_ring; /**< virtual address of SW ring. */ + volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */ + uint32_t txd_type; /**< Device-specific TXD type */ + uint16_t nb_tx_desc; /**< number of TX descriptors. */ + uint16_t tx_tail; /**< Current value of TDT register. */ + uint16_t tx_head; + /**< Index of first used TX descriptor. */ + uint16_t queue_id; /**< TX queue index. */ + uint16_t reg_idx; /**< TX queue register index. */ + uint16_t port_id; /**< Device port identifier. */ + uint8_t pthresh; /**< Prefetch threshold register. */ + uint8_t hthresh; /**< Host threshold register. */ + uint8_t wthresh; /**< Write-back threshold register. */ + uint8_t ctx_curr; + + /**< Start context position for transmit queue. */ + struct igc_advctx_info ctx_cache[IGC_CTX_NUM]; + /**< Hardware context history.*/ + uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */ +}; + +static inline uint64_t +rx_desc_statuserr_to_pkt_flags(uint32_t statuserr) +{ + static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD, + PKT_RX_L4_CKSUM_BAD}; + + static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD, + PKT_RX_IP_CKSUM_BAD}; + uint64_t pkt_flags = 0; + uint32_t tmp; + + if (statuserr & IGC_RXD_STAT_VP) + pkt_flags |= PKT_RX_VLAN_STRIPPED; + + tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS)); + tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E); + pkt_flags |= l4_chksum_flags[tmp]; + + tmp = !!(statuserr & IGC_RXD_STAT_IPCS); + tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE); + pkt_flags |= l3_chksum_flags[tmp]; + + return pkt_flags; +} + +#define IGC_PACKET_TYPE_IPV4 0X01 +#define IGC_PACKET_TYPE_IPV4_TCP 0X11 +#define IGC_PACKET_TYPE_IPV4_UDP 0X21 +#define IGC_PACKET_TYPE_IPV4_SCTP 0X41 +#define IGC_PACKET_TYPE_IPV4_EXT 0X03 +#define IGC_PACKET_TYPE_IPV4_EXT_SCTP 0X43 +#define IGC_PACKET_TYPE_IPV6 0X04 +#define IGC_PACKET_TYPE_IPV6_TCP 0X14 +#define IGC_PACKET_TYPE_IPV6_UDP 0X24 +#define IGC_PACKET_TYPE_IPV6_EXT 0X0C +#define IGC_PACKET_TYPE_IPV6_EXT_TCP 0X1C +#define IGC_PACKET_TYPE_IPV6_EXT_UDP 0X2C +#define IGC_PACKET_TYPE_IPV4_IPV6 0X05 +#define IGC_PACKET_TYPE_IPV4_IPV6_TCP 0X15 +#define IGC_PACKET_TYPE_IPV4_IPV6_UDP 0X25 +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT 0X0D +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D +#define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D +#define IGC_PACKET_TYPE_MAX 0X80 +#define IGC_PACKET_TYPE_MASK 0X7F +#define IGC_PACKET_TYPE_SHIFT 0X04 + +static inline uint32_t +rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info) +{ + static const uint32_t + ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = { + [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4, + [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4_EXT, + [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6, + [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6, + [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT, + [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP, + [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP, + [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP, + [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP, + [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP, + [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP, + [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER | + RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP, + }; + if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF)) + return RTE_PTYPE_UNKNOWN; + + pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK; + + return ptype_table[pkt_info]; +} + +static inline void +rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm, + union igc_adv_rx_desc *rxd, uint32_t staterr) +{ + uint64_t pkt_flags; + uint32_t hlen_type_rss; + uint16_t pkt_info; + + /* Prefetch data of first segment, if configured to do so. */ + rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off); + + rxm->port = rxq->port_id; + hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data); + rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss); + rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan); + + pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ? + PKT_RX_RSS_HASH : 0; + + if (hlen_type_rss & IGC_RXD_VPKT) + pkt_flags |= PKT_RX_VLAN; + + pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr); + + rxm->ol_flags = pkt_flags; + pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info); +} + +static uint16_t +igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct igc_rx_queue * const rxq = rx_queue; + volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring; + struct igc_rx_entry * const sw_ring = rxq->sw_ring; + uint16_t rx_id = rxq->rx_tail; + uint16_t nb_rx = 0; + uint16_t nb_hold = 0; + + while (nb_rx < nb_pkts) { + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_entry *rxe; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + union igc_adv_rx_desc rxd; + uint32_t staterr; + uint16_t data_len; + + /* + * The order of operations here is important as the DD status + * bit must not be read after any other descriptor fields. + * rx_ring and rxdp are pointing to volatile data so the order + * of accesses cannot be reordered by the compiler. If they were + * not volatile, they could be reordered which could lead to + * using invalid descriptor fields when read from rxd. + */ + rxdp = &rx_ring[rx_id]; + staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error); + if (!(staterr & IGC_RXD_STAT_DD)) + break; + rxd = *rxdp; + + /* + * End of packet. + * + * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is + * likely to be invalid and to be dropped by the various + * validation checks performed by the network stack. + * + * Allocate a new mbuf to replenish the RX ring descriptor. + * If the allocation fails: + * - arrange for that RX descriptor to be the first one + * being parsed the next time the receive function is + * invoked [on the same queue]. + * + * - Stop parsing the RX ring and return immediately. + * + * This policy does not drop the packet received in the RX + * descriptor for which the allocation of a new mbuf failed. + * Thus, it allows that packet to be later retrieved if + * mbuf have been freed in the mean time. + * As a side effect, holding RX descriptors instead of + * systematically giving them back to the NIC may lead to + * RX ring exhaustion situations. + * However, the NIC can gracefully prevent such situations + * to happen by sending specific "back-pressure" flow control + * frames to its peer(s). + */ + PMD_RX_LOG(DEBUG, + "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u", + rxq->port_id, rxq->queue_id, rx_id, staterr, + rte_le_to_cpu_16(rxd.wb.upper.length)); + + nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + if (nmb == NULL) { + unsigned int id; + PMD_RX_LOG(DEBUG, + "RX mbuf alloc failed, port_id=%u queue_id=%u", + rxq->port_id, rxq->queue_id); + id = rxq->port_id; + rte_eth_devices[id].data->rx_mbuf_alloc_failed++; + break; + } + + nb_hold++; + rxe = &sw_ring[rx_id]; + rx_id++; + if (rx_id >= rxq->nb_rx_desc) + rx_id = 0; + + /* Prefetch next mbuf while processing current one. */ + rte_igc_prefetch(sw_ring[rx_id].mbuf); + + /* + * When next RX descriptor is on a cache-line boundary, + * prefetch the next 4 RX descriptors and the next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_igc_prefetch(&rx_ring[rx_id]); + rte_igc_prefetch(&sw_ring[rx_id]); + } + + /* + * Update RX descriptor with the physical address of the new + * data buffer of the new allocated mbuf. + */ + rxm = rxe->mbuf; + rxe->mbuf = nmb; + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxm->next = NULL; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len; + rxm->data_len = data_len; + rxm->pkt_len = data_len; + rxm->nb_segs = 1; + + rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr); + + /* + * Store the mbuf address into the next entry of the array + * of returned packets. + */ + rx_pkts[nb_rx++] = rxm; + } + rxq->rx_tail = rx_id; + + /* + * If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the + * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = nb_hold + rxq->nb_rx_hold; + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, + "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u", + rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx); + rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1); + IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + return nb_rx; +} + +static uint16_t +igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct igc_rx_queue * const rxq = rx_queue; + volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring; + struct igc_rx_entry * const sw_ring = rxq->sw_ring; + struct rte_mbuf *first_seg = rxq->pkt_first_seg; + struct rte_mbuf *last_seg = rxq->pkt_last_seg; + + uint16_t rx_id = rxq->rx_tail; + uint16_t nb_rx = 0; + uint16_t nb_hold = 0; + + while (nb_rx < nb_pkts) { + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_entry *rxe; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + union igc_adv_rx_desc rxd; + uint32_t staterr; + uint16_t data_len; + +next_desc: + /* + * The order of operations here is important as the DD status + * bit must not be read after any other descriptor fields. + * rx_ring and rxdp are pointing to volatile data so the order + * of accesses cannot be reordered by the compiler. If they were + * not volatile, they could be reordered which could lead to + * using invalid descriptor fields when read from rxd. + */ + rxdp = &rx_ring[rx_id]; + staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error); + if (!(staterr & IGC_RXD_STAT_DD)) + break; + rxd = *rxdp; + + /* + * Descriptor done. + * + * Allocate a new mbuf to replenish the RX ring descriptor. + * If the allocation fails: + * - arrange for that RX descriptor to be the first one + * being parsed the next time the receive function is + * invoked [on the same queue]. + * + * - Stop parsing the RX ring and return immediately. + * + * This policy does not drop the packet received in the RX + * descriptor for which the allocation of a new mbuf failed. + * Thus, it allows that packet to be later retrieved if + * mbuf have been freed in the mean time. + * As a side effect, holding RX descriptors instead of + * systematically giving them back to the NIC may lead to + * RX ring exhaustion situations. + * However, the NIC can gracefully prevent such situations + * to happen by sending specific "back-pressure" flow control + * frames to its peer(s). + */ + PMD_RX_LOG(DEBUG, + "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u", + rxq->port_id, rxq->queue_id, rx_id, staterr, + rte_le_to_cpu_16(rxd.wb.upper.length)); + + nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + if (nmb == NULL) { + unsigned int id; + PMD_RX_LOG(DEBUG, + "RX mbuf alloc failed, port_id=%u queue_id=%u", + rxq->port_id, rxq->queue_id); + id = rxq->port_id; + rte_eth_devices[id].data->rx_mbuf_alloc_failed++; + break; + } + + nb_hold++; + rxe = &sw_ring[rx_id]; + rx_id++; + if (rx_id >= rxq->nb_rx_desc) + rx_id = 0; + + /* Prefetch next mbuf while processing current one. */ + rte_igc_prefetch(sw_ring[rx_id].mbuf); + + /* + * When next RX descriptor is on a cache-line boundary, + * prefetch the next 4 RX descriptors and the next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_igc_prefetch(&rx_ring[rx_id]); + rte_igc_prefetch(&sw_ring[rx_id]); + } + + /* + * Update RX descriptor with the physical address of the new + * data buffer of the new allocated mbuf. + */ + rxm = rxe->mbuf; + rxe->mbuf = nmb; + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxm->next = NULL; + + /* + * Set data length & data buffer address of mbuf. + */ + rxm->data_off = RTE_PKTMBUF_HEADROOM; + data_len = rte_le_to_cpu_16(rxd.wb.upper.length); + rxm->data_len = data_len; + + /* + * If this is the first buffer of the received packet, + * set the pointer to the first mbuf of the packet and + * initialize its context. + * Otherwise, update the total length and the number of segments + * of the current scattered packet, and update the pointer to + * the last mbuf of the current packet. + */ + if (first_seg == NULL) { + first_seg = rxm; + first_seg->pkt_len = data_len; + first_seg->nb_segs = 1; + } else { + first_seg->pkt_len += data_len; + first_seg->nb_segs++; + last_seg->next = rxm; + } + + /* + * If this is not the last buffer of the received packet, + * update the pointer to the last mbuf of the current scattered + * packet and continue to parse the RX ring. + */ + if (!(staterr & IGC_RXD_STAT_EOP)) { + last_seg = rxm; + goto next_desc; + } + + /* + * This is the last buffer of the received packet. + * If the CRC is not stripped by the hardware: + * - Subtract the CRC length from the total packet length. + * - If the last buffer only contains the whole CRC or a part + * of it, free the mbuf associated to the last buffer. + * If part of the CRC is also contained in the previous + * mbuf, subtract the length of that CRC part from the + * data length of the previous mbuf. + */ + if (unlikely(rxq->crc_len > 0)) { + first_seg->pkt_len -= RTE_ETHER_CRC_LEN; + if (data_len <= RTE_ETHER_CRC_LEN) { + rte_pktmbuf_free_seg(rxm); + first_seg->nb_segs--; + last_seg->data_len = last_seg->data_len - + (RTE_ETHER_CRC_LEN - data_len); + last_seg->next = NULL; + } else { + rxm->data_len = (uint16_t) + (data_len - RTE_ETHER_CRC_LEN); + } + } + + rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr); + + /* + * Store the mbuf address into the next entry of the array + * of returned packets. + */ + rx_pkts[nb_rx++] = first_seg; + + /* Setup receipt context for a new packet. */ + first_seg = NULL; + } + rxq->rx_tail = rx_id; + + /* + * Save receive context. + */ + rxq->pkt_first_seg = first_seg; + rxq->pkt_last_seg = last_seg; + + /* + * If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the + * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = nb_hold + rxq->nb_rx_hold; + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, + "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u", + rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx); + rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1); + IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + return nb_rx; +} + +static void +igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq) +{ + unsigned int i; + + if (rxq->sw_ring != NULL) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) { + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + rxq->sw_ring[i].mbuf = NULL; + } + } + } +} + +static void +igc_rx_queue_release(struct igc_rx_queue *rxq) +{ + igc_rx_queue_release_mbufs(rxq); + rte_free(rxq->sw_ring); + rte_free(rxq); +} + +void eth_igc_rx_queue_release(void *rxq) +{ + if (rxq) + igc_rx_queue_release(rxq); +} + +uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id) +{ + /** + * Check the DD bit of a rx descriptor of each 4 in a group, + * to avoid checking too frequently and downgrading performance + * too much. + */ +#define IGC_RXQ_SCAN_INTERVAL 4 + + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_queue *rxq; + uint16_t desc = 0; + + rxq = dev->data->rx_queues[rx_queue_id]; + rxdp = &rxq->rx_ring[rxq->rx_tail]; + + while (desc < rxq->nb_rx_desc - rxq->rx_tail) { + if (unlikely(!(rxdp->wb.upper.status_error & + IGC_RXD_STAT_DD))) + return desc; + desc += IGC_RXQ_SCAN_INTERVAL; + rxdp += IGC_RXQ_SCAN_INTERVAL; + } + rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc]; + + while (desc < rxq->nb_rx_desc && + (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) { + desc += IGC_RXQ_SCAN_INTERVAL; + rxdp += IGC_RXQ_SCAN_INTERVAL; + } + + return desc; +} + +int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset) +{ + volatile union igc_adv_rx_desc *rxdp; + struct igc_rx_queue *rxq = rx_queue; + uint32_t desc; + + if (unlikely(!rxq || offset >= rxq->nb_rx_desc)) + return 0; + + desc = rxq->rx_tail + offset; + if (desc >= rxq->nb_rx_desc) + desc -= rxq->nb_rx_desc; + + rxdp = &rxq->rx_ring[desc]; + return !!(rxdp->wb.upper.status_error & + rte_cpu_to_le_32(IGC_RXD_STAT_DD)); +} + +int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset) +{ + struct igc_rx_queue *rxq = rx_queue; + volatile uint32_t *status; + uint32_t desc; + + if (unlikely(!rxq || offset >= rxq->nb_rx_desc)) + return -EINVAL; + + if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold) + return RTE_ETH_RX_DESC_UNAVAIL; + + desc = rxq->rx_tail + offset; + if (desc >= rxq->nb_rx_desc) + desc -= rxq->nb_rx_desc; + + status = &rxq->rx_ring[desc].wb.upper.status_error; + if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD)) + return RTE_ETH_RX_DESC_DONE; + + return RTE_ETH_RX_DESC_AVAIL; +} + +static int +igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq) +{ + struct igc_rx_entry *rxe = rxq->sw_ring; + uint64_t dma_addr; + unsigned int i; + + /* Initialize software ring entries. */ + for (i = 0; i < rxq->nb_rx_desc; i++) { + volatile union igc_adv_rx_desc *rxd; + struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool); + + if (mbuf == NULL) { + PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu", + rxq->queue_id); + return -ENOMEM; + } + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); + rxd = &rxq->rx_ring[i]; + rxd->read.hdr_addr = 0; + rxd->read.pkt_addr = dma_addr; + rxe[i].mbuf = mbuf; + } + + return 0; +} + +/* + * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet. + * Used as the default key. + */ +static uint8_t default_rss_key[40] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, + 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, +}; + +static void +igc_rss_disable(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t mrqc; + + mrqc = IGC_READ_REG(hw, IGC_MRQC); + mrqc &= ~IGC_MRQC_ENABLE_MASK; + IGC_WRITE_REG(hw, IGC_MRQC, mrqc); +} + +static void +igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf) +{ + uint32_t *hash_key = (uint32_t *)rss_conf->rss_key; + uint32_t mrqc; + uint64_t rss_hf; + + if (hash_key != NULL) { + uint8_t i; + + /* Fill in RSS hash key */ + for (i = 0; i < IGC_HKEY_MAX_INDEX; i++) + IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]); + } + + /* Set configured hashing protocols in MRQC register */ + rss_hf = rss_conf->rss_hf; + mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */ + if (rss_hf & ETH_RSS_IPV4) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP; + if (rss_hf & ETH_RSS_IPV6) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6; + if (rss_hf & ETH_RSS_IPV6_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP; + if (rss_hf & ETH_RSS_IPV6_TCP_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; + if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; + if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; + if (rss_hf & ETH_RSS_IPV6_UDP_EX) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX; + IGC_WRITE_REG(hw, IGC_MRQC, mrqc); +} + +static void +igc_rss_configure(struct rte_eth_dev *dev) +{ + struct rte_eth_rss_conf rss_conf; + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint16_t i; + + /* Fill in redirection table. */ + for (i = 0; i < IGC_RSS_RDT_SIZD; i++) { + union igc_rss_reta_reg reta; + uint16_t q_idx, reta_idx; + + q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ? + i % dev->data->nb_rx_queues : 0); + reta_idx = i % sizeof(reta); + reta.bytes[reta_idx] = q_idx; + if (reta_idx == sizeof(reta) - 1) + IGC_WRITE_REG_LE_VALUE(hw, + IGC_RETA(i / sizeof(reta)), reta.dword); + } + + /* + * Configure the RSS key and the RSS protocols used to compute + * the RSS hash of input packets. + */ + rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf; + if (rss_conf.rss_key == NULL) + rss_conf.rss_key = default_rss_key; + igc_hw_rss_hash_set(hw, &rss_conf); +} + +static int +igc_dev_mq_rx_configure(struct rte_eth_dev *dev) +{ + if (RTE_ETH_DEV_SRIOV(dev).active) { + PMD_DRV_LOG(ERR, "SRIOV unsupported!"); + return -EINVAL; + } + + switch (dev->data->dev_conf.rxmode.mq_mode) { + case ETH_MQ_RX_RSS: + igc_rss_configure(dev); + break; + case ETH_MQ_RX_NONE: + /* + * configure RSS register for following, + * then disable the RSS logic + */ + igc_rss_configure(dev); + igc_rss_disable(dev); + break; + default: + PMD_DRV_LOG(ERR, "rx mode(%d) not supported!", + dev->data->dev_conf.rxmode.mq_mode); + return -EINVAL; + } + return 0; +} + +int +igc_rx_init(struct rte_eth_dev *dev) +{ + struct igc_rx_queue *rxq; + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint64_t offloads = dev->data->dev_conf.rxmode.offloads; + uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; + uint32_t rctl; + uint32_t rxcsum; + uint16_t buf_size; + uint16_t rctl_bsize; + uint16_t i; + int ret; + + dev->rx_pkt_burst = igc_recv_pkts; + + /* + * Make sure receives are disabled while setting + * up the descriptor ring. + */ + rctl = IGC_READ_REG(hw, IGC_RCTL); + IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN); + + /* Configure support of jumbo frames, if any. */ + if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + rctl |= IGC_RCTL_LPE; + + /* + * Set maximum packet length by default, and might be updated + * together with enabling/disabling dual VLAN. + */ + IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len); + } else { + rctl &= ~IGC_RCTL_LPE; + } + + /* Configure and enable each RX queue. */ + rctl_bsize = 0; + for (i = 0; i < dev->data->nb_rx_queues; i++) { + uint64_t bus_addr; + uint32_t rxdctl; + uint32_t srrctl; + + rxq = dev->data->rx_queues[i]; + rxq->flags = 0; + + /* Allocate buffers for descriptor rings and set up queue */ + ret = igc_alloc_rx_queue_mbufs(rxq); + if (ret) + return ret; + + /* + * Reset crc_len in case it was changed after queue setup by a + * call to configure + */ + rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ? + RTE_ETHER_CRC_LEN : 0; + + bus_addr = rxq->rx_ring_phys_addr; + IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx), + rxq->nb_rx_desc * + sizeof(union igc_adv_rx_desc)); + IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx), + (uint32_t)(bus_addr >> 32)); + IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx), + (uint32_t)bus_addr); + + /* set descriptor configuration */ + srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; + + srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) << + IGC_SRRCTL_BSIZEHEADER_SHIFT; + /* + * Configure RX buffer size. + */ + buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - + RTE_PKTMBUF_HEADROOM); + if (buf_size >= 1024) { + /* + * Configure the BSIZEPACKET field of the SRRCTL + * register of the queue. + * Value is in 1 KB resolution, from 1 KB to 16 KB. + * If this field is equal to 0b, then RCTL.BSIZE + * determines the RX packet buffer size. + */ + + srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) & + IGC_SRRCTL_BSIZEPKT_MASK); + buf_size = (uint16_t)((srrctl & + IGC_SRRCTL_BSIZEPKT_MASK) << + IGC_SRRCTL_BSIZEPKT_SHIFT); + + /* It adds dual VLAN length for supporting dual VLAN */ + if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size) + dev->data->scattered_rx = 1; + } else { + /* + * Use BSIZE field of the device RCTL register. + */ + if (rctl_bsize == 0 || rctl_bsize > buf_size) + rctl_bsize = buf_size; + dev->data->scattered_rx = 1; + } + + /* Set if packets are dropped when no descriptors available */ + if (rxq->drop_en) + srrctl |= IGC_SRRCTL_DROP_EN; + + IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl); + + /* Enable this RX queue. */ + rxdctl = IGC_RXDCTL_QUEUE_ENABLE; + rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) & + IGC_RXDCTL_PTHRESH_MSK; + rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) & + IGC_RXDCTL_HTHRESH_MSK; + rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) & + IGC_RXDCTL_WTHRESH_MSK; + IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl); + } + + if (offloads & DEV_RX_OFFLOAD_SCATTER) + dev->data->scattered_rx = 1; + + if (dev->data->scattered_rx) { + PMD_DRV_LOG(DEBUG, "forcing scatter mode"); + dev->rx_pkt_burst = igc_recv_scattered_pkts; + } + /* + * Setup BSIZE field of RCTL register, if needed. + * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL + * register, since the code above configures the SRRCTL register of + * the RX queue in such a case. + * All configurable sizes are: + * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX); + * 8192: rctl |= (IGC_RCTL_SZ_8192 | IGC_RCTL_BSEX); + * 4096: rctl |= (IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX); + * 2048: rctl |= IGC_RCTL_SZ_2048; + * 1024: rctl |= IGC_RCTL_SZ_1024; + * 512: rctl |= IGC_RCTL_SZ_512; + * 256: rctl |= IGC_RCTL_SZ_256; + */ + if (rctl_bsize > 0) { + if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */ + rctl |= IGC_RCTL_SZ_512; + else /* 256 <= buf_size < 512 - use 256 */ + rctl |= IGC_RCTL_SZ_256; + } + + /* + * Configure RSS if device configured with multiple RX queues. + */ + igc_dev_mq_rx_configure(dev); + + /* Update the rctl since igc_dev_mq_rx_configure may change its value */ + rctl |= IGC_READ_REG(hw, IGC_RCTL); + + /* + * Setup the Checksum Register. + * Receive Full-Packet Checksum Offload is mutually exclusive with RSS. + */ + rxcsum = IGC_READ_REG(hw, IGC_RXCSUM); + rxcsum |= IGC_RXCSUM_PCSD; + + /* Enable both L3/L4 rx checksum offload */ + if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) + rxcsum |= IGC_RXCSUM_IPOFL; + else + rxcsum &= ~IGC_RXCSUM_IPOFL; + + if (offloads & + (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) { + rxcsum |= IGC_RXCSUM_TUOFL; + offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM; + } else { + rxcsum &= ~IGC_RXCSUM_TUOFL; + } + + if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM) + rxcsum |= IGC_RXCSUM_CRCOFL; + else + rxcsum &= ~IGC_RXCSUM_CRCOFL; + + IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum); + + /* Setup the Receive Control Register. */ + if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) + rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */ + else + rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */ + + rctl &= ~IGC_RCTL_MO_MSK; + rctl &= ~IGC_RCTL_LBM_MSK; + rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO | + IGC_RCTL_DPF | + (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); + + rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI | + IGC_RCTL_PSP | IGC_RCTL_PMCF); + + /* Make sure VLAN Filters are off. */ + rctl &= ~IGC_RCTL_VFE; + /* Don't store bad packets. */ + rctl &= ~IGC_RCTL_SBP; + + /* Enable Receives. */ + IGC_WRITE_REG(hw, IGC_RCTL, rctl); + + /* + * Setup the HW Rx Head and Tail Descriptor Pointers. + * This needs to be done after enable. + */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0); + IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), + rxq->nb_rx_desc - 1); + } + + return 0; +} + +static void +igc_reset_rx_queue(struct igc_rx_queue *rxq) +{ + static const union igc_adv_rx_desc zeroed_desc = { {0} }; + unsigned int i; + + /* Zero out HW ring memory */ + for (i = 0; i < rxq->nb_rx_desc; i++) + rxq->rx_ring[i] = zeroed_desc; + + rxq->rx_tail = 0; + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; +} + +int +eth_igc_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + const struct rte_memzone *rz; + struct igc_rx_queue *rxq; + unsigned int size; + + /* + * Validate number of receive descriptors. + * It must not exceed hardware maximum, and must be multiple + * of IGC_RX_DESCRIPTOR_MULTIPLE. + */ + if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 || + nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) { + PMD_DRV_LOG(ERR, + "RX descriptor must be multiple of %u(cur: %u) and between %u and %u", + IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc, + IGC_MIN_RXD, IGC_MAX_RXD); + return -EINVAL; + } + + /* Free memory prior to re-allocation if needed */ + if (dev->data->rx_queues[queue_idx] != NULL) { + igc_rx_queue_release(dev->data->rx_queues[queue_idx]); + dev->data->rx_queues[queue_idx] = NULL; + } + + /* First allocate the RX queue data structure. */ + rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue), + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -ENOMEM; + rxq->offloads = rx_conf->offloads; + rxq->mb_pool = mp; + rxq->nb_rx_desc = nb_desc; + rxq->pthresh = rx_conf->rx_thresh.pthresh; + rxq->hthresh = rx_conf->rx_thresh.hthresh; + rxq->wthresh = rx_conf->rx_thresh.wthresh; + rxq->drop_en = rx_conf->rx_drop_en; + rxq->rx_free_thresh = rx_conf->rx_free_thresh; + rxq->queue_id = queue_idx; + rxq->reg_idx = queue_idx; + rxq->port_id = dev->data->port_id; + + /* + * Allocate RX ring hardware descriptors. A memzone large enough to + * handle the maximum ring size is allocated in order to allow for + * resizing in later calls to the queue setup function. + */ + size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD; + rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size, + IGC_ALIGN, socket_id); + if (rz == NULL) { + igc_rx_queue_release(rxq); + return -ENOMEM; + } + rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx)); + rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx)); + rxq->rx_ring_phys_addr = rz->iova; + rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr; + + /* Allocate software ring. */ + rxq->sw_ring = rte_zmalloc("rxq->sw_ring", + sizeof(struct igc_rx_entry) * nb_desc, + RTE_CACHE_LINE_SIZE); + if (rxq->sw_ring == NULL) { + igc_rx_queue_release(rxq); + return -ENOMEM; + } + + PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64, + rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr); + + dev->data->rx_queues[queue_idx] = rxq; + igc_reset_rx_queue(rxq); + + return 0; +} + +/* prepare packets for transmit */ +static uint16_t +eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + /* Check some limitations for TSO in hardware */ + if (m->ol_flags & IGC_TX_OFFLOAD_SEG) + if (m->tso_segsz > IGC_TSO_MAX_MSS || + m->l2_len + m->l3_len + m->l4_len > + IGC_TSO_MAX_HDRLEN) { + rte_errno = EINVAL; + return i; + } + + if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = -ret; + return i; + } + } + + return i; +} + +/* + *There're some limitations in hardware for TCP segmentation offload. We + *should check whether the parameters are valid. + */ +static inline uint64_t +check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para) +{ + if (!(ol_req & IGC_TX_OFFLOAD_SEG)) + return ol_req; + if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len + + ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) { + ol_req &= ~IGC_TX_OFFLOAD_SEG; + ol_req |= PKT_TX_TCP_CKSUM; + } + return ol_req; +} + +/* + * Check which hardware context can be used. Use the existing match + * or create a new context descriptor. + */ +static inline uint32_t +what_advctx_update(struct igc_tx_queue *txq, uint64_t flags, + union igc_tx_offload tx_offload) +{ + uint32_t curr = txq->ctx_curr; + + /* If match with the current context */ + if (likely(txq->ctx_cache[curr].flags == flags && + txq->ctx_cache[curr].tx_offload.data == + (txq->ctx_cache[curr].tx_offload_mask.data & + tx_offload.data))) { + return curr; + } + + /* Total two context, if match with the second context */ + curr ^= 1; + if (likely(txq->ctx_cache[curr].flags == flags && + txq->ctx_cache[curr].tx_offload.data == + (txq->ctx_cache[curr].tx_offload_mask.data & + tx_offload.data))) { + txq->ctx_curr = curr; + return curr; + } + + /* Mismatch, create new one */ + return IGC_CTX_NUM; +} + +/* + * This is a separate function, looking for optimization opportunity here + * Rework required to go with the pre-defined values. + */ +static inline void +igc_set_xmit_ctx(struct igc_tx_queue *txq, + volatile struct igc_adv_tx_context_desc *ctx_txd, + uint64_t ol_flags, union igc_tx_offload tx_offload) +{ + uint32_t type_tucmd_mlhl; + uint32_t mss_l4len_idx; + uint32_t ctx_curr; + uint32_t vlan_macip_lens; + union igc_tx_offload tx_offload_mask; + + /* Use the previous context */ + txq->ctx_curr ^= 1; + ctx_curr = txq->ctx_curr; + + tx_offload_mask.data = 0; + type_tucmd_mlhl = 0; + + /* Specify which HW CTX to upload. */ + mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT); + + if (ol_flags & PKT_TX_VLAN_PKT) + tx_offload_mask.vlan_tci = 0xffff; + + /* check if TCP segmentation required for this packet */ + if (ol_flags & IGC_TX_OFFLOAD_SEG) { + /* implies IP cksum in IPv4 */ + if (ol_flags & PKT_TX_IP_CKSUM) + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + else + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + + if (ol_flags & PKT_TX_TCP_SEG) + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP; + else + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP; + + tx_offload_mask.data |= TX_TSO_CMP_MASK; + mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz << + IGC_ADVTXD_MSS_SHIFT; + mss_l4len_idx |= (uint32_t)tx_offload.l4_len << + IGC_ADVTXD_L4LEN_SHIFT; + } else { /* no TSO, check if hardware checksum is needed */ + if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)) + tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK; + + if (ol_flags & PKT_TX_IP_CKSUM) + type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4; + + switch (ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + case PKT_TX_UDP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + case PKT_TX_SCTP_CKSUM: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr) + << IGC_ADVTXD_L4LEN_SHIFT; + break; + default: + type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV | + IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT; + break; + } + } + + txq->ctx_cache[ctx_curr].flags = ol_flags; + txq->ctx_cache[ctx_curr].tx_offload.data = + tx_offload_mask.data & tx_offload.data; + txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask; + + ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl); + vlan_macip_lens = (uint32_t)tx_offload.data; + ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens); + ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx); + ctx_txd->u.launch_time = 0; +} + +static inline uint32_t +tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags) +{ + uint32_t cmdtype; + static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE}; + static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE}; + cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0]; + cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0]; + return cmdtype; +} + +static inline uint32_t +tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags) +{ + static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM}; + static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM}; + uint32_t tmp; + + tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM]; + tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0]; + tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0]; + return tmp; +} + +static uint16_t +igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct igc_tx_queue * const txq = tx_queue; + struct igc_tx_entry * const sw_ring = txq->sw_ring; + struct igc_tx_entry *txe, *txn; + volatile union igc_adv_tx_desc * const txr = txq->tx_ring; + volatile union igc_adv_tx_desc *txd; + struct rte_mbuf *tx_pkt; + struct rte_mbuf *m_seg; + uint64_t buf_dma_addr; + uint32_t olinfo_status; + uint32_t cmd_type_len; + uint32_t pkt_len; + uint16_t slen; + uint64_t ol_flags; + uint16_t tx_end; + uint16_t tx_id; + uint16_t tx_last; + uint16_t nb_tx; + uint64_t tx_ol_req; + uint32_t new_ctx = 0; + union igc_tx_offload tx_offload = {0}; + + tx_id = txq->tx_tail; + txe = &sw_ring[tx_id]; + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + tx_pkt = *tx_pkts++; + pkt_len = tx_pkt->pkt_len; + + RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); + + /* + * The number of descriptors that must be allocated for a + * packet is the number of segments of that packet, plus 1 + * Context Descriptor for the VLAN Tag Identifier, if any. + * Determine the last TX descriptor to allocate in the TX ring + * for the packet, starting from the current position (tx_id) + * in the ring. + */ + tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1); + + ol_flags = tx_pkt->ol_flags; + tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK; + + /* If a Context Descriptor need be built . */ + if (tx_ol_req) { + tx_offload.l2_len = tx_pkt->l2_len; + tx_offload.l3_len = tx_pkt->l3_len; + tx_offload.l4_len = tx_pkt->l4_len; + tx_offload.vlan_tci = tx_pkt->vlan_tci; + tx_offload.tso_segsz = tx_pkt->tso_segsz; + tx_ol_req = check_tso_para(tx_ol_req, tx_offload); + + new_ctx = what_advctx_update(txq, tx_ol_req, + tx_offload); + /* Only allocate context descriptor if required*/ + new_ctx = (new_ctx >= IGC_CTX_NUM); + tx_last = (uint16_t)(tx_last + new_ctx); + } + if (tx_last >= txq->nb_tx_desc) + tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); + + PMD_TX_LOG(DEBUG, + "port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u", + txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last); + + /* + * Check if there are enough free descriptors in the TX ring + * to transmit the next packet. + * This operation is based on the two following rules: + * + * 1- Only check that the last needed TX descriptor can be + * allocated (by construction, if that descriptor is free, + * all intermediate ones are also free). + * + * For this purpose, the index of the last TX descriptor + * used for a packet (the "last descriptor" of a packet) + * is recorded in the TX entries (the last one included) + * that are associated with all TX descriptors allocated + * for that packet. + * + * 2- Avoid to allocate the last free TX descriptor of the + * ring, in order to never set the TDT register with the + * same value stored in parallel by the NIC in the TDH + * register, which makes the TX engine of the NIC enter + * in a deadlock situation. + * + * By extension, avoid to allocate a free descriptor that + * belongs to the last set of free descriptors allocated + * to the same packet previously transmitted. + */ + + /* + * The "last descriptor" of the previously sent packet, if any, + * which used the last descriptor to allocate. + */ + tx_end = sw_ring[tx_last].last_id; + + /* + * The next descriptor following that "last descriptor" in the + * ring. + */ + tx_end = sw_ring[tx_end].next_id; + + /* + * The "last descriptor" associated with that next descriptor. + */ + tx_end = sw_ring[tx_end].last_id; + + /* + * Check that this descriptor is free. + */ + if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) { + if (nb_tx == 0) + return 0; + goto end_of_tx; + } + + /* + * Set common flags of all TX Data Descriptors. + * + * The following bits must be set in all Data Descriptors: + * - IGC_ADVTXD_DTYP_DATA + * - IGC_ADVTXD_DCMD_DEXT + * + * The following bits must be set in the first Data Descriptor + * and are ignored in the other ones: + * - IGC_ADVTXD_DCMD_IFCS + * - IGC_ADVTXD_MAC_1588 + * - IGC_ADVTXD_DCMD_VLE + * + * The following bits must only be set in the last Data + * Descriptor: + * - IGC_TXD_CMD_EOP + * + * The following bits can be set in any Data Descriptor, but + * are only set in the last Data Descriptor: + * - IGC_TXD_CMD_RS + */ + cmd_type_len = txq->txd_type | + IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT; + if (tx_ol_req & IGC_TX_OFFLOAD_SEG) + pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + + tx_pkt->l4_len); + olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT); + + /* + * Timer 0 should be used to for packet timestamping, + * sample the packet timestamp to reg 0 + */ + if (ol_flags & PKT_TX_IEEE1588_TMST) + cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP; + + if (tx_ol_req) { + /* Setup TX Advanced context descriptor if required */ + if (new_ctx) { + volatile struct igc_adv_tx_context_desc * + ctx_txd = (volatile struct + igc_adv_tx_context_desc *)&txr[tx_id]; + + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + + if (txe->mbuf != NULL) { + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = NULL; + } + + igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req, + tx_offload); + + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + } + + /* Setup the TX Advanced Data Descriptor */ + cmd_type_len |= + tx_desc_vlan_flags_to_cmdtype(tx_ol_req); + olinfo_status |= + tx_desc_cksum_flags_to_olinfo(tx_ol_req); + olinfo_status |= (uint32_t)txq->ctx_curr << + IGC_ADVTXD_IDX_SHIFT; + } + + m_seg = tx_pkt; + do { + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + + txd = &txr[tx_id]; + + if (txe->mbuf != NULL) + rte_pktmbuf_free_seg(txe->mbuf); + txe->mbuf = m_seg; + + /* Set up transmit descriptor */ + slen = (uint16_t)m_seg->data_len; + buf_dma_addr = rte_mbuf_data_iova(m_seg); + txd->read.buffer_addr = + rte_cpu_to_le_64(buf_dma_addr); + txd->read.cmd_type_len = + rte_cpu_to_le_32(cmd_type_len | slen); + txd->read.olinfo_status = + rte_cpu_to_le_32(olinfo_status); + txe->last_id = tx_last; + tx_id = txe->next_id; + txe = txn; + m_seg = m_seg->next; + } while (m_seg != NULL); + + /* + * The last packet data descriptor needs End Of Packet (EOP) + * and Report Status (RS). + */ + txd->read.cmd_type_len |= + rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS); + } +end_of_tx: + rte_wmb(); + + /* + * Set the Transmit Descriptor Tail (TDT). + */ + IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); + PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", + txq->port_id, txq->queue_id, tx_id, nb_tx); + txq->tx_tail = tx_id; + + return nb_tx; +} + +int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset) +{ + struct igc_tx_queue *txq = tx_queue; + volatile uint32_t *status; + uint32_t desc; + + if (unlikely(!txq || offset >= txq->nb_tx_desc)) + return -EINVAL; + + desc = txq->tx_tail + offset; + if (desc >= txq->nb_tx_desc) + desc -= txq->nb_tx_desc; + + status = &txq->tx_ring[desc].wb.status; + if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD)) + return RTE_ETH_TX_DESC_DONE; + + return RTE_ETH_TX_DESC_FULL; +} + +static void +igc_tx_queue_release_mbufs(struct igc_tx_queue *txq) +{ + unsigned int i; + + if (txq->sw_ring != NULL) { + for (i = 0; i < txq->nb_tx_desc; i++) { + if (txq->sw_ring[i].mbuf != NULL) { + rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf); + txq->sw_ring[i].mbuf = NULL; + } + } + } +} + +static void +igc_tx_queue_release(struct igc_tx_queue *txq) +{ + igc_tx_queue_release_mbufs(txq); + rte_free(txq->sw_ring); + rte_free(txq); +} + +void eth_igc_tx_queue_release(void *txq) +{ + if (txq) + igc_tx_queue_release(txq); +} + +static void +igc_reset_tx_queue_stat(struct igc_tx_queue *txq) +{ + txq->tx_head = 0; + txq->tx_tail = 0; + txq->ctx_curr = 0; + memset((void *)&txq->ctx_cache, 0, + IGC_CTX_NUM * sizeof(struct igc_advctx_info)); +} + +static void +igc_reset_tx_queue(struct igc_tx_queue *txq) +{ + struct igc_tx_entry *txe = txq->sw_ring; + uint16_t i, prev; + + /* Initialize ring entries */ + prev = (uint16_t)(txq->nb_tx_desc - 1); + for (i = 0; i < txq->nb_tx_desc; i++) { + volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i]; + + txd->wb.status = IGC_TXD_STAT_DD; + txe[i].mbuf = NULL; + txe[i].last_id = i; + txe[prev].next_id = i; + prev = i; + } + + txq->txd_type = IGC_ADVTXD_DTYP_DATA; + igc_reset_tx_queue_stat(txq); +} + +/* + * clear all rx/tx queue + */ +void +igc_dev_clear_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + struct igc_tx_queue *txq; + struct igc_rx_queue *rxq; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if (txq != NULL) { + igc_tx_queue_release_mbufs(txq); + igc_reset_tx_queue(txq); + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (rxq != NULL) { + igc_rx_queue_release_mbufs(rxq); + igc_reset_rx_queue(rxq); + } + } +} + +int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + const struct rte_memzone *tz; + struct igc_tx_queue *txq; + struct igc_hw *hw; + uint32_t size; + + if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 || + nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) { + PMD_DRV_LOG(ERR, + "TX-descriptor must be a multiple of %u and between %u and %u, cur: %u", + IGC_TX_DESCRIPTOR_MULTIPLE, + IGC_MAX_TXD, IGC_MIN_TXD, nb_desc); + return -EINVAL; + } + + hw = IGC_DEV_PRIVATE_HW(dev); + + /* + * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G + * driver. + */ + if (tx_conf->tx_free_thresh != 0) + PMD_DRV_LOG(INFO, + "The tx_free_thresh parameter is not used for the 2.5G driver"); + if (tx_conf->tx_rs_thresh != 0) + PMD_DRV_LOG(INFO, + "The tx_rs_thresh parameter is not used for the 2.5G driver"); + if (tx_conf->tx_thresh.wthresh == 0) + PMD_DRV_LOG(INFO, + "To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16."); + + /* Free memory prior to re-allocation if needed */ + if (dev->data->tx_queues[queue_idx] != NULL) { + igc_tx_queue_release(dev->data->tx_queues[queue_idx]); + dev->data->tx_queues[queue_idx] = NULL; + } + + /* First allocate the tx queue data structure */ + txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue), + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + + /* + * Allocate TX ring hardware descriptors. A memzone large enough to + * handle the maximum ring size is allocated in order to allow for + * resizing in later calls to the queue setup function. + */ + size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD; + tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size, + IGC_ALIGN, socket_id); + if (tz == NULL) { + igc_tx_queue_release(txq); + return -ENOMEM; + } + + txq->nb_tx_desc = nb_desc; + txq->pthresh = tx_conf->tx_thresh.pthresh; + txq->hthresh = tx_conf->tx_thresh.hthresh; + txq->wthresh = tx_conf->tx_thresh.wthresh; + + txq->queue_id = queue_idx; + txq->reg_idx = queue_idx; + txq->port_id = dev->data->port_id; + + txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx)); + txq->tx_ring_phys_addr = tz->iova; + + txq->tx_ring = (union igc_adv_tx_desc *)tz->addr; + /* Allocate software ring */ + txq->sw_ring = rte_zmalloc("txq->sw_ring", + sizeof(struct igc_tx_entry) * nb_desc, + RTE_CACHE_LINE_SIZE); + if (txq->sw_ring == NULL) { + igc_tx_queue_release(txq); + return -ENOMEM; + } + PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64, + txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr); + + igc_reset_tx_queue(txq); + dev->tx_pkt_burst = igc_xmit_pkts; + dev->tx_pkt_prepare = ð_igc_prep_pkts; + dev->data->tx_queues[queue_idx] = txq; + txq->offloads = tx_conf->offloads; + + return 0; +} + +int +eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt) +{ + struct igc_tx_queue *txq = txqueue; + struct igc_tx_entry *sw_ring; + volatile union igc_adv_tx_desc *txr; + uint16_t tx_first; /* First segment analyzed. */ + uint16_t tx_id; /* Current segment being processed. */ + uint16_t tx_last; /* Last segment in the current packet. */ + uint16_t tx_next; /* First segment of the next packet. */ + uint32_t count; + + if (txq == NULL) + return -ENODEV; + + count = 0; + sw_ring = txq->sw_ring; + txr = txq->tx_ring; + + /* + * tx_tail is the last sent packet on the sw_ring. Goto the end + * of that packet (the last segment in the packet chain) and + * then the next segment will be the start of the oldest segment + * in the sw_ring. This is the first packet that will be + * attempted to be freed. + */ + + /* Get last segment in most recently added packet. */ + tx_first = sw_ring[txq->tx_tail].last_id; + + /* Get the next segment, which is the oldest segment in ring. */ + tx_first = sw_ring[tx_first].next_id; + + /* Set the current index to the first. */ + tx_id = tx_first; + + /* + * Loop through each packet. For each packet, verify that an + * mbuf exists and that the last segment is free. If so, free + * it and move on. + */ + while (1) { + tx_last = sw_ring[tx_id].last_id; + + if (sw_ring[tx_last].mbuf) { + if (!(txr[tx_last].wb.status & + rte_cpu_to_le_32(IGC_TXD_STAT_DD))) + break; + + /* Get the start of the next packet. */ + tx_next = sw_ring[tx_last].next_id; + + /* + * Loop through all segments in a + * packet. + */ + do { + rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf); + sw_ring[tx_id].mbuf = NULL; + sw_ring[tx_id].last_id = tx_id; + + /* Move to next segemnt. */ + tx_id = sw_ring[tx_id].next_id; + } while (tx_id != tx_next); + + /* + * Increment the number of packets + * freed. + */ + count++; + if (unlikely(count == free_cnt)) + break; + } else { + /* + * There are multiple reasons to be here: + * 1) All the packets on the ring have been + * freed - tx_id is equal to tx_first + * and some packets have been freed. + * - Done, exit + * 2) Interfaces has not sent a rings worth of + * packets yet, so the segment after tail is + * still empty. Or a previous call to this + * function freed some of the segments but + * not all so there is a hole in the list. + * Hopefully this is a rare case. + * - Walk the list and find the next mbuf. If + * there isn't one, then done. + */ + if (likely(tx_id == tx_first && count != 0)) + break; + + /* + * Walk the list and find the next mbuf, if any. + */ + do { + /* Move to next segemnt. */ + tx_id = sw_ring[tx_id].next_id; + + if (sw_ring[tx_id].mbuf) + break; + + } while (tx_id != tx_first); + + /* + * Determine why previous loop bailed. If there + * is not an mbuf, done. + */ + if (sw_ring[tx_id].mbuf == NULL) + break; + } + } + + return count; +} + +void +igc_tx_init(struct rte_eth_dev *dev) +{ + struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev); + uint32_t tctl; + uint32_t txdctl; + uint16_t i; + + /* Setup the Base and Length of the Tx Descriptor Rings. */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct igc_tx_queue *txq = dev->data->tx_queues[i]; + uint64_t bus_addr = txq->tx_ring_phys_addr; + + IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx), + txq->nb_tx_desc * + sizeof(union igc_adv_tx_desc)); + IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx), + (uint32_t)(bus_addr >> 32)); + IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx), + (uint32_t)bus_addr); + + /* Setup the HW Tx Head and Tail descriptor pointers. */ + IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0); + IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0); + + /* Setup Transmit threshold registers. */ + txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) & + IGC_TXDCTL_PTHRESH_MSK; + txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) & + IGC_TXDCTL_HTHRESH_MSK; + txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) & + IGC_TXDCTL_WTHRESH_MSK; + txdctl |= IGC_TXDCTL_QUEUE_ENABLE; + IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl); + } + + igc_config_collision_dist(hw); + + /* Program the Transmit Control Register. */ + tctl = IGC_READ_REG(hw, IGC_TCTL); + tctl &= ~IGC_TCTL_CT; + tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN | + ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT)); + + /* This write will effectively turn on the transmit unit. */ + IGC_WRITE_REG(hw, IGC_TCTL, tctl); +} + +void +eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct igc_rx_queue *rxq; + + rxq = dev->data->rx_queues[queue_id]; + + qinfo->mp = rxq->mb_pool; + qinfo->scattered_rx = dev->data->scattered_rx; + qinfo->nb_desc = rxq->nb_rx_desc; + + qinfo->conf.rx_free_thresh = rxq->rx_free_thresh; + qinfo->conf.rx_drop_en = rxq->drop_en; + qinfo->conf.offloads = rxq->offloads; + qinfo->conf.rx_thresh.hthresh = rxq->hthresh; + qinfo->conf.rx_thresh.pthresh = rxq->pthresh; + qinfo->conf.rx_thresh.wthresh = rxq->wthresh; +} + +void +eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct igc_tx_queue *txq; + + txq = dev->data->tx_queues[queue_id]; + + qinfo->nb_desc = txq->nb_tx_desc; + + qinfo->conf.tx_thresh.pthresh = txq->pthresh; + qinfo->conf.tx_thresh.hthresh = txq->hthresh; + qinfo->conf.tx_thresh.wthresh = txq->wthresh; + qinfo->conf.offloads = txq->offloads; +} diff --git a/drivers/net/igc/igc_txrx.h b/drivers/net/igc/igc_txrx.h new file mode 100644 index 0000000000..1b66b3ff4e --- /dev/null +++ b/drivers/net/igc/igc_txrx.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _IGC_TXRX_H_ +#define _IGC_TXRX_H_ + +#include "igc_ethdev.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * RX/TX function prototypes + */ +void eth_igc_tx_queue_release(void *txq); +void eth_igc_rx_queue_release(void *rxq); +void igc_dev_clear_queues(struct rte_eth_dev *dev); +int eth_igc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id); + +int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset); + +int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset); + +int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset); + +int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +int eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt); + +int igc_rx_init(struct rte_eth_dev *dev); +void igc_tx_init(struct rte_eth_dev *dev); +void eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); +void eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +#ifdef __cplusplus +} +#endif + +#endif /* _IGC_TXRX_H_ */ diff --git a/drivers/net/igc/meson.build b/drivers/net/igc/meson.build index aa211d6a56..e402f26106 100644 --- a/drivers/net/igc/meson.build +++ b/drivers/net/igc/meson.build @@ -6,7 +6,8 @@ objs = [base_objs] sources = files( 'igc_logs.c', - 'igc_ethdev.c' + 'igc_ethdev.c', + 'igc_txrx.c' ) includes += include_directories('base') -- 2.20.1