mbuf: add new Rx flags for stripped VLAN
[dpdk.git] / drivers / net / nfp / nfp_net.c
index ff9a8d6..5c9f350 100644 (file)
  * Netronome vNIC DPDK Poll-Mode Driver: Main entry point
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/io.h>
-#include <assert.h>
-#include <time.h>
 #include <math.h>
-#include <inttypes.h>
 
 #include <rte_byteorder.h>
 #include <rte_common.h>
@@ -65,6 +54,7 @@
 #include <rte_version.h>
 #include <rte_string_fns.h>
 #include <rte_alarm.h>
+#include <rte_spinlock.h>
 
 #include "nfp_net_pmd.h"
 #include "nfp_net_logs.h"
@@ -73,6 +63,9 @@
 /* Prototypes */
 static void nfp_net_close(struct rte_eth_dev *dev);
 static int nfp_net_configure(struct rte_eth_dev *dev);
+static void nfp_net_dev_interrupt_handler(struct rte_intr_handle *handle,
+                                         void *param);
+static void nfp_net_dev_interrupt_delayed_handler(void *param);
 static int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static void nfp_net_infos_get(struct rte_eth_dev *dev,
                              struct rte_eth_dev_info *dev_info);
@@ -330,7 +323,7 @@ nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
 
        for (i = 0; i < txq->tx_count; i++) {
                if (txq->txbufs[i].mbuf) {
-                       rte_pktmbuf_free_seg(txq->txbufs[i].mbuf);
+                       rte_pktmbuf_free(txq->txbufs[i].mbuf);
                        txq->txbufs[i].mbuf = NULL;
                }
        }
@@ -355,6 +348,7 @@ nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
        txq->wr_p = 0;
        txq->rd_p = 0;
        txq->tail = 0;
+       txq->qcp_rd_p = 0;
 }
 
 static int
@@ -414,6 +408,8 @@ nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update)
        PMD_DRV_LOG(DEBUG, "nfp_net_reconfig: ctrl=%08x update=%08x\n",
                    ctrl, update);
 
+       rte_spinlock_lock(&hw->reconfig_lock);
+
        nn_cfg_writel(hw, NFP_NET_CFG_CTRL, ctrl);
        nn_cfg_writel(hw, NFP_NET_CFG_UPDATE, update);
 
@@ -421,6 +417,8 @@ nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update)
 
        err = __nfp_net_reconfig(hw, update);
 
+       rte_spinlock_unlock(&hw->reconfig_lock);
+
        if (!err)
                return 0;
 
@@ -731,6 +729,7 @@ nfp_net_close(struct rte_eth_dev *dev)
 
        nfp_net_stop(dev);
 
+       rte_intr_disable(&dev->pci_dev->intr_handle);
        nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff);
 
        /*
@@ -823,11 +822,11 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
        memset(&link, 0, sizeof(struct rte_eth_link));
 
        if (nn_link_status & NFP_NET_CFG_STS_LINK)
-               link.link_status = 1;
+               link.link_status = ETH_LINK_UP;
 
        link.link_duplex = ETH_LINK_FULL_DUPLEX;
        /* Other cards can limit the tx and rx rate per VF */
-       link.link_speed = ETH_LINK_SPEED_40G;
+       link.link_speed = ETH_SPEED_NUM_40G;
 
        if (old.link_status != link.link_status) {
                nfp_net_dev_atomic_write_link_status(dev, &link);
@@ -908,11 +907,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        nfp_dev_stats.obytes -= hw->eth_stats_base.obytes;
 
-       nfp_dev_stats.imcasts =
-               nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
-       nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
        /* reading general device stats */
        nfp_dev_stats.ierrors =
                nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -924,12 +918,6 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        nfp_dev_stats.oerrors -= hw->eth_stats_base.oerrors;
 
-       /* Multicast frames received */
-       nfp_dev_stats.imcasts =
-               nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
-       nfp_dev_stats.imcasts -= hw->eth_stats_base.imcasts;
-
        /* RX ring mbuf allocation failures */
        nfp_dev_stats.rx_nombuf = dev->data->rx_mbuf_alloc_failed;
 
@@ -991,9 +979,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
        hw->eth_stats_base.obytes =
                nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
 
-       hw->eth_stats_base.imcasts =
-               nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
        /* reading general device stats */
        hw->eth_stats_base.ierrors =
                nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
@@ -1001,10 +986,6 @@ nfp_net_stats_reset(struct rte_eth_dev *dev)
        hw->eth_stats_base.oerrors =
                nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
 
-       /* Multicast frames received */
-       hw->eth_stats_base.imcasts =
-               nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_MC_FRAMES);
-
        /* RX ring mbuf allocation failures */
        dev->data->rx_mbuf_alloc_failed = 0;
 
@@ -1066,9 +1047,26 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        };
 
        dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
-#if RTE_VER_MAJOR == 2 && RTE_VER_MINOR >= 1
        dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
-#endif
+
+       dev_info->speed_capa = ETH_LINK_SPEED_40G | ETH_LINK_SPEED_100G;
+}
+
+static const uint32_t *
+nfp_net_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+       static const uint32_t ptypes[] = {
+               /* refers to nfp_net_set_hash() */
+               RTE_PTYPE_INNER_L3_IPV4,
+               RTE_PTYPE_INNER_L3_IPV6,
+               RTE_PTYPE_INNER_L3_IPV6_EXT,
+               RTE_PTYPE_INNER_L4_MASK,
+               RTE_PTYPE_UNKNOWN
+       };
+
+       if (dev->rx_pkt_burst == nfp_net_recv_pkts)
+               return ptypes;
+       return NULL;
 }
 
 static uint32_t
@@ -1115,6 +1113,114 @@ nfp_net_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx)
        return count;
 }
 
+static void
+nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
+{
+       struct rte_eth_link link;
+
+       memset(&link, 0, sizeof(link));
+       nfp_net_dev_atomic_read_link_status(dev, &link);
+       if (link.link_status)
+               RTE_LOG(INFO, PMD, "Port %d: Link Up - speed %u Mbps - %s\n",
+                       (int)(dev->data->port_id), (unsigned)link.link_speed,
+                       link.link_duplex == ETH_LINK_FULL_DUPLEX
+                       ? "full-duplex" : "half-duplex");
+       else
+               RTE_LOG(INFO, PMD, " Port %d: Link Down\n",
+                       (int)(dev->data->port_id));
+
+       RTE_LOG(INFO, PMD, "PCI Address: %04d:%02d:%02d:%d\n",
+               dev->pci_dev->addr.domain, dev->pci_dev->addr.bus,
+               dev->pci_dev->addr.devid, dev->pci_dev->addr.function);
+}
+
+/* Interrupt configuration and handling */
+
+/*
+ * nfp_net_irq_unmask - Unmask an interrupt
+ *
+ * If MSI-X auto-masking is enabled clear the mask bit, otherwise
+ * clear the ICR for the entry.
+ */
+static void
+nfp_net_irq_unmask(struct rte_eth_dev *dev)
+{
+       struct nfp_net_hw *hw;
+
+       hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
+               /* If MSI-X auto-masking is used, clear the entry */
+               rte_wmb();
+               rte_intr_enable(&dev->pci_dev->intr_handle);
+       } else {
+               /* Make sure all updates are written before un-masking */
+               rte_wmb();
+               nn_cfg_writeb(hw, NFP_NET_CFG_ICR(NFP_NET_IRQ_LSC_IDX),
+                             NFP_NET_CFG_ICR_UNMASKED);
+       }
+}
+
+static void
+nfp_net_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
+                             void *param)
+{
+       int64_t timeout;
+       struct rte_eth_link link;
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+
+       PMD_DRV_LOG(DEBUG, "We got a LSC interrupt!!!\n");
+
+       /* get the link status */
+       memset(&link, 0, sizeof(link));
+       nfp_net_dev_atomic_read_link_status(dev, &link);
+
+       nfp_net_link_update(dev, 0);
+
+       /* likely to up */
+       if (!link.link_status) {
+               /* handle it 1 sec later, wait it being stable */
+               timeout = NFP_NET_LINK_UP_CHECK_TIMEOUT;
+               /* likely to down */
+       } else {
+               /* handle it 4 sec later, wait it being stable */
+               timeout = NFP_NET_LINK_DOWN_CHECK_TIMEOUT;
+       }
+
+       if (rte_eal_alarm_set(timeout * 1000,
+                             nfp_net_dev_interrupt_delayed_handler,
+                             (void *)dev) < 0) {
+               RTE_LOG(ERR, PMD, "Error setting alarm");
+               /* Unmasking */
+               nfp_net_irq_unmask(dev);
+       }
+}
+
+/*
+ * Interrupt handler which shall be registered for alarm callback for delayed
+ * handling specific interrupt to wait for the stable nic state. As the NIC
+ * interrupt state is not stable for nfp after link is just down, it needs
+ * to wait 4 seconds to get the stable status.
+ *
+ * @param handle   Pointer to interrupt handle.
+ * @param param    The address of parameter (struct rte_eth_dev *)
+ *
+ * @return  void
+ */
+static void
+nfp_net_dev_interrupt_delayed_handler(void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+
+       nfp_net_link_update(dev, 0);
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+
+       nfp_net_dev_link_status_print(dev);
+
+       /* Unmasking */
+       nfp_net_irq_unmask(dev);
+}
+
 static int
 nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
@@ -1163,7 +1269,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
            (nb_desc > NFP_NET_MAX_RX_DESC) ||
            (nb_desc < NFP_NET_MIN_RX_DESC)) {
                RTE_LOG(ERR, PMD, "Wrong nb_desc value\n");
-               return (-EINVAL);
+               return -EINVAL;
        }
 
        /*
@@ -1179,7 +1285,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
        if (rxq == NULL)
-               return (-ENOMEM);
+               return -ENOMEM;
 
        /* Hw queues mapping based on firmware confifguration */
        rxq->qidx = queue_idx;
@@ -1216,7 +1322,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        if (tz == NULL) {
                RTE_LOG(ERR, PMD, "Error allocatig rx dma\n");
                nfp_net_rx_queue_release(rxq);
-               return (-ENOMEM);
+               return -ENOMEM;
        }
 
        /* Saving physical and virtual addresses for the RX ring */
@@ -1229,7 +1335,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
                                         RTE_CACHE_LINE_SIZE, socket_id);
        if (rxq->rxbufs == NULL) {
                nfp_net_rx_queue_release(rxq);
-               return (-ENOMEM);
+               return -ENOMEM;
        }
 
        PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64 "\n",
@@ -1267,7 +1373,7 @@ nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq)
                if (mbuf == NULL) {
                        RTE_LOG(ERR, PMD, "RX mbuf alloc failed queue_id=%u\n",
                                (unsigned)rxq->qidx);
-                       return (-ENOMEM);
+                       return -ENOMEM;
                }
 
                dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf));
@@ -1345,7 +1451,7 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                                 RTE_CACHE_LINE_SIZE, socket_id);
        if (txq == NULL) {
                RTE_LOG(ERR, PMD, "Error allocating tx dma\n");
-               return (-ENOMEM);
+               return -ENOMEM;
        }
 
        /*
@@ -1359,7 +1465,7 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        if (tz == NULL) {
                RTE_LOG(ERR, PMD, "Error allocating tx dma\n");
                nfp_net_tx_queue_release(txq);
-               return (-ENOMEM);
+               return -ENOMEM;
        }
 
        txq->tx_count = nb_desc;
@@ -1387,7 +1493,7 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                                         RTE_CACHE_LINE_SIZE, socket_id);
        if (txq->txbufs == NULL) {
                nfp_net_tx_queue_release(txq);
-               return (-ENOMEM);
+               return -ENOMEM;
        }
        PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64 "\n",
                   txq->txbufs, txq->txds, (unsigned long int)txq->dma);
@@ -1412,7 +1518,7 @@ static inline void
 nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
                 struct rte_mbuf *mb)
 {
-       uint16_t ol_flags;
+       uint64_t ol_flags;
        struct nfp_net_hw *hw = txq->hw;
 
        if (!(hw->cap & NFP_NET_CFG_CTRL_TXCSUM))
@@ -1433,7 +1539,8 @@ nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
                break;
        }
 
-       txd->flags |= PCIE_DESC_TX_CSUM;
+       if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
+               txd->flags |= PCIE_DESC_TX_CSUM;
 }
 
 /* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */
@@ -1693,7 +1800,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
                    (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
                        mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
-                       mb->ol_flags |= PKT_RX_VLAN_PKT;
+                       mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
                }
 
                /* Adding the mbuff to the mbuff array passed by the app */
@@ -1869,13 +1976,18 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                 */
                pkt_size = pkt->pkt_len;
 
-               while (pkt_size) {
-                       /* Releasing mbuf which was prefetched above */
-                       if (*lmbuf)
-                               rte_pktmbuf_free_seg(*lmbuf);
+               /* Releasing mbuf which was prefetched above */
+               if (*lmbuf)
+                       rte_pktmbuf_free(*lmbuf);
+               /*
+                * Linking mbuf with descriptor for being released
+                * next time descriptor is used
+                */
+               *lmbuf = pkt;
 
+               while (pkt_size) {
                        dma_size = pkt->data_len;
-                       dma_addr = RTE_MBUF_DATA_DMA_ADDR(pkt);
+                       dma_addr = rte_mbuf_data_dma_addr(pkt);
                        PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:"
                                   "%" PRIx64 "\n", dma_addr);
 
@@ -1887,12 +1999,6 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        ASSERT(free_descs > 0);
                        free_descs--;
 
-                       /*
-                        * Linking mbuf with descriptor for being released
-                        * next time descriptor is used
-                        */
-                       *lmbuf = pkt;
-
                        txq->wr_p++;
                        txq->tail++;
                        if (unlikely(txq->tail == txq->tx_count)) /* wrapping?*/
@@ -2171,7 +2277,7 @@ nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev,
 }
 
 /* Initialise and register driver with DPDK Application */
-static struct eth_dev_ops nfp_net_eth_dev_ops = {
+static const struct eth_dev_ops nfp_net_eth_dev_ops = {
        .dev_configure          = nfp_net_configure,
        .dev_start              = nfp_net_start,
        .dev_stop               = nfp_net_stop,
@@ -2182,6 +2288,7 @@ static struct eth_dev_ops nfp_net_eth_dev_ops = {
        .stats_get              = nfp_net_stats_get,
        .stats_reset            = nfp_net_stats_reset,
        .dev_infos_get          = nfp_net_infos_get,
+       .dev_supported_ptypes_get = nfp_net_supported_ptypes_get,
        .mtu_set                = nfp_net_dev_mtu_set,
        .vlan_offload_set       = nfp_net_vlan_offload_set,
        .reta_update            = nfp_net_reta_update,
@@ -2218,6 +2325,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                return 0;
 
        pci_dev = eth_dev->pci_dev;
+       rte_eth_copy_pci_info(eth_dev, pci_dev);
+
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
        hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
@@ -2294,6 +2403,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        PMD_INIT_LOG(INFO, "max_rx_queues: %u, max_tx_queues: %u\n",
                     hw->max_rx_queues, hw->max_tx_queues);
 
+       /* Initializing spinlock for reconfigs */
+       rte_spinlock_init(&hw->reconfig_lock);
+
        /* Allocating memory for mac addr */
        eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0);
        if (eth_dev->data->mac_addrs == NULL) {
@@ -2315,6 +2427,17 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
                     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
 
+       /* Registering LSC interrupt handler */
+       rte_intr_callback_register(&pci_dev->intr_handle,
+                                  nfp_net_dev_interrupt_handler,
+                                  (void *)eth_dev);
+
+       /* enable uio intr after callback register */
+       rte_intr_enable(&pci_dev->intr_handle);
+
+       /* Telling the firmware about the LSC interrupt entry */
+       nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+
        /* Recording current stats counters values */
        nfp_net_stats_reset(eth_dev);
 
@@ -2343,7 +2466,8 @@ static struct eth_driver rte_nfp_net_pmd = {
        {
                .name = "rte_nfp_net_pmd",
                .id_table = pci_id_nfp_net_map,
-               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+               .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                            RTE_PCI_DRV_DETACHABLE,
        },
        .eth_dev_init = nfp_net_init,
        .dev_private_size = sizeof(struct nfp_net_adapter),