net/nfp: support hardware RSS v2
[dpdk.git] / drivers / net / nfp / nfp_net.c
index 0917b9c..bedd4b6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2015 Netronome Systems, Inc.
+ * Copyright (c) 2014-2018 Netronome Systems, Inc.
  * All rights reserved.
  *
  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
@@ -43,7 +43,7 @@
 #include <rte_common.h>
 #include <rte_log.h>
 #include <rte_debug.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
 #include <rte_ethdev_pci.h>
 #include <rte_dev.h>
 #include <rte_ether.h>
 #include <rte_alarm.h>
 #include <rte_spinlock.h>
 
-#include "nfp_nfpu.h"
+#include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nffw.h"
+#include "nfpcore/nfp_hwinfo.h"
+#include "nfpcore/nfp_mip.h"
+#include "nfpcore/nfp_rtsym.h"
+#include "nfpcore/nfp_nsp.h"
+
 #include "nfp_net_pmd.h"
 #include "nfp_net_logs.h"
 #include "nfp_net_ctrl.h"
@@ -95,18 +101,23 @@ static void nfp_net_stop(struct rte_eth_dev *dev);
 static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                                  uint16_t nb_pkts);
 
-/*
- * The offset of the queue controller queues in the PCIe Target. These
- * happen to be at the same offset on the NFP6000 and the NFP3200 so
- * we use a single macro here.
- */
-#define NFP_PCIE_QUEUE(_q)     (0x800 * ((_q) & 0xff))
+static int nfp_net_rss_config_default(struct rte_eth_dev *dev);
+static int nfp_net_rss_hash_update(struct rte_eth_dev *dev,
+                                  struct rte_eth_rss_conf *rss_conf);
+static int nfp_net_rss_reta_write(struct rte_eth_dev *dev,
+                   struct rte_eth_rss_reta_entry64 *reta_conf,
+                   uint16_t reta_size);
+static int nfp_net_rss_hash_write(struct rte_eth_dev *dev,
+                       struct rte_eth_rss_conf *rss_conf);
+
+/* The offset of the queue controller queues in the PCIe Target */
+#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff)))
 
 /* Maximum value which can be added to a queue with one transaction */
 #define NFP_QCP_MAX_ADD        0x7f
 
 #define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \
-       (uint64_t)((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
+       (uint64_t)((mb)->buf_iova + RTE_PKTMBUF_HEADROOM)
 
 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
 enum nfp_qcp_ptr {
@@ -204,57 +215,6 @@ nn_cfg_writeq(struct nfp_net_hw *hw, int off, uint64_t val)
        nn_writeq(rte_cpu_to_le_64(val), hw->ctrl_bar + off);
 }
 
-/*
- * Atomically reads link status information from global structure rte_eth_dev.
- *
- * @param dev
- *   - Pointer to the structure rte_eth_dev to read from.
- *   - Pointer to the buffer to be saved with the link status.
- *
- * @return
- *   - On success, zero.
- *   - On failure, negative value.
- */
-static inline int
-nfp_net_dev_atomic_read_link_status(struct rte_eth_dev *dev,
-                                   struct rte_eth_link *link)
-{
-       struct rte_eth_link *dst = link;
-       struct rte_eth_link *src = &dev->data->dev_link;
-
-       if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-                               *(uint64_t *)src) == 0)
-               return -1;
-
-       return 0;
-}
-
-/*
- * Atomically writes the link status information into global
- * structure rte_eth_dev.
- *
- * @param dev
- *   - Pointer to the structure rte_eth_dev to read from.
- *   - Pointer to the buffer to be saved with the link status.
- *
- * @return
- *   - On success, zero.
- *   - On failure, negative value.
- */
-static inline int
-nfp_net_dev_atomic_write_link_status(struct rte_eth_dev *dev,
-                                    struct rte_eth_link *link)
-{
-       struct rte_eth_link *dst = &dev->data->dev_link;
-       struct rte_eth_link *src = link;
-
-       if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
-                               *(uint64_t *)src) == 0)
-               return -1;
-
-       return 0;
-}
-
 static void
 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq)
 {
@@ -418,8 +378,6 @@ nfp_net_configure(struct rte_eth_dev *dev)
        struct rte_eth_conf *dev_conf;
        struct rte_eth_rxmode *rxmode;
        struct rte_eth_txmode *txmode;
-       uint32_t new_ctrl = 0;
-       uint32_t update = 0;
        struct nfp_net_hw *hw;
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -445,98 +403,154 @@ nfp_net_configure(struct rte_eth_dev *dev)
        }
 
        /* Checking RX mode */
-       if (rxmode->mq_mode & ETH_MQ_RX_RSS) {
-               if (hw->cap & NFP_NET_CFG_CTRL_RSS) {
-                       update = NFP_NET_CFG_UPDATE_RSS;
-                       new_ctrl = NFP_NET_CFG_CTRL_RSS;
-               } else {
-                       PMD_INIT_LOG(INFO, "RSS not supported");
-                       return -EINVAL;
-               }
+       if (rxmode->mq_mode & ETH_MQ_RX_RSS &&
+           !(hw->cap & NFP_NET_CFG_CTRL_RSS)) {
+               PMD_INIT_LOG(INFO, "RSS not supported");
+               return -EINVAL;
        }
 
-       if (rxmode->split_hdr_size) {
+       /* Checking RX offloads */
+       if (rxmode->offloads & DEV_RX_OFFLOAD_HEADER_SPLIT) {
                PMD_INIT_LOG(INFO, "rxmode does not support split header");
                return -EINVAL;
        }
 
-       if (rxmode->hw_ip_checksum) {
-               if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM) {
-                       new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
-               } else {
-                       PMD_INIT_LOG(INFO, "RXCSUM not supported");
-                       return -EINVAL;
-               }
-       }
+       if ((rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_RXCSUM))
+               PMD_INIT_LOG(INFO, "RXCSUM not supported");
 
-       if (rxmode->hw_vlan_filter) {
+       if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER) {
                PMD_INIT_LOG(INFO, "VLAN filter not supported");
                return -EINVAL;
        }
 
-       if (rxmode->hw_vlan_strip) {
-               if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN) {
-                       new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
-               } else {
-                       PMD_INIT_LOG(INFO, "hw vlan strip not supported");
-                       return -EINVAL;
-               }
+       if ((rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_RXVLAN)) {
+               PMD_INIT_LOG(INFO, "hw vlan strip not supported");
+               return -EINVAL;
        }
 
-       if (rxmode->hw_vlan_extend) {
+       if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) {
                PMD_INIT_LOG(INFO, "VLAN extended not supported");
                return -EINVAL;
        }
 
-       if (rxmode->jumbo_frame)
-               /* this is handled in rte_eth_dev_configure */
+       if (rxmode->offloads & DEV_RX_OFFLOAD_TCP_LRO) {
+               PMD_INIT_LOG(INFO, "LRO not supported");
+               return -EINVAL;
+       }
 
-       if (rxmode->hw_strip_crc) {
-               PMD_INIT_LOG(INFO, "strip CRC not supported");
+       if (rxmode->offloads & DEV_RX_OFFLOAD_QINQ_STRIP) {
+               PMD_INIT_LOG(INFO, "QINQ STRIP not supported");
                return -EINVAL;
        }
 
-       if (rxmode->enable_scatter) {
+       if (rxmode->offloads & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM) {
+               PMD_INIT_LOG(INFO, "Outer IP checksum not supported");
+               return -EINVAL;
+       }
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_MACSEC_STRIP) {
+               PMD_INIT_LOG(INFO, "MACSEC strip not supported");
+               return -EINVAL;
+       }
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_MACSEC_STRIP) {
+               PMD_INIT_LOG(INFO, "MACSEC strip not supported");
+               return -EINVAL;
+       }
+
+       if (!(rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP))
+               PMD_INIT_LOG(INFO, "HW does strip CRC. No configurable!");
+
+       if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_SCATTER)) {
                PMD_INIT_LOG(INFO, "Scatter not supported");
                return -EINVAL;
        }
 
-       /* If next capabilities are supported, configure them by default */
+       if (rxmode->offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+               PMD_INIT_LOG(INFO, "timestamp offfload not supported");
+               return -EINVAL;
+       }
 
-       /* VLAN insertion */
-       if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
-               new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+       if (rxmode->offloads & DEV_RX_OFFLOAD_SECURITY) {
+               PMD_INIT_LOG(INFO, "security offload not supported");
+               return -EINVAL;
+       }
 
-       /* L2 broadcast */
-       if (hw->cap & NFP_NET_CFG_CTRL_L2BC)
-               new_ctrl |= NFP_NET_CFG_CTRL_L2BC;
+       /* checking TX offloads */
+       if ((txmode->offloads & DEV_TX_OFFLOAD_VLAN_INSERT) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_TXVLAN)) {
+               PMD_INIT_LOG(INFO, "vlan insert offload not supported");
+               return -EINVAL;
+       }
 
-       /* L2 multicast */
-       if (hw->cap & NFP_NET_CFG_CTRL_L2MC)
-               new_ctrl |= NFP_NET_CFG_CTRL_L2MC;
+       if ((txmode->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_TXCSUM)) {
+               PMD_INIT_LOG(INFO, "TX checksum offload not supported");
+               return -EINVAL;
+       }
 
-       /* TX checksum offload */
-       if (hw->cap & NFP_NET_CFG_CTRL_TXCSUM)
-               new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+       if (txmode->offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) {
+               PMD_INIT_LOG(INFO, "TX SCTP checksum offload not supported");
+               return -EINVAL;
+       }
 
-       /* LSO offload */
-       if (hw->cap & NFP_NET_CFG_CTRL_LSO)
-               new_ctrl |= NFP_NET_CFG_CTRL_LSO;
+       if ((txmode->offloads & DEV_TX_OFFLOAD_TCP_TSO) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_LSO_ANY)) {
+               PMD_INIT_LOG(INFO, "TSO TCP offload not supported");
+               return -EINVAL;
+       }
 
-       /* RX gather */
-       if (hw->cap & NFP_NET_CFG_CTRL_GATHER)
-               new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
+       if (txmode->offloads & DEV_TX_OFFLOAD_UDP_TSO) {
+               PMD_INIT_LOG(INFO, "TSO UDP offload not supported");
+               return -EINVAL;
+       }
 
-       if (!new_ctrl)
-               return 0;
+       if (txmode->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) {
+               PMD_INIT_LOG(INFO, "TX outer checksum offload not supported");
+               return -EINVAL;
+       }
+
+       if (txmode->offloads & DEV_TX_OFFLOAD_QINQ_INSERT) {
+               PMD_INIT_LOG(INFO, "QINQ insert offload not supported");
+               return -EINVAL;
+       }
 
-       update |= NFP_NET_CFG_UPDATE_GEN;
+       if (txmode->offloads & DEV_TX_OFFLOAD_VXLAN_TNL_TSO ||
+           txmode->offloads & DEV_TX_OFFLOAD_GRE_TNL_TSO ||
+           txmode->offloads & DEV_TX_OFFLOAD_IPIP_TNL_TSO ||
+           txmode->offloads & DEV_TX_OFFLOAD_GENEVE_TNL_TSO) {
+               PMD_INIT_LOG(INFO, "tunneling offload not supported");
+               return -EINVAL;
+       }
 
-       nn_cfg_writel(hw, NFP_NET_CFG_CTRL, new_ctrl);
-       if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
-               return -EIO;
+       if (txmode->offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) {
+               PMD_INIT_LOG(INFO, "TX MACSEC offload not supported");
+               return -EINVAL;
+       }
 
-       hw->ctrl = new_ctrl;
+       if (txmode->offloads & DEV_TX_OFFLOAD_MT_LOCKFREE) {
+               PMD_INIT_LOG(INFO, "multiqueue lockfree not supported");
+               return -EINVAL;
+       }
+
+       if ((txmode->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
+           !(hw->cap & NFP_NET_CFG_CTRL_GATHER)) {
+               PMD_INIT_LOG(INFO, "TX multisegs  not supported");
+               return -EINVAL;
+       }
+
+       if (txmode->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) {
+               PMD_INIT_LOG(INFO, "mbuf fast-free not supported");
+               return -EINVAL;
+       }
+
+       if (txmode->offloads & DEV_TX_OFFLOAD_SECURITY) {
+               PMD_INIT_LOG(INFO, "TX security offload not supported");
+               return -EINVAL;
+       }
 
        return 0;
 }
@@ -618,47 +632,29 @@ nfp_net_cfg_queue_setup(struct nfp_net_hw *hw)
 #define ETH_ADDR_LEN   6
 
 static void
-nfp_eth_copy_mac_reverse(uint8_t *dst, const uint8_t *src)
+nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src)
 {
        int i;
 
        for (i = 0; i < ETH_ADDR_LEN; i++)
-               dst[ETH_ADDR_LEN - i - 1] = src[i];
+               dst[i] = src[i];
 }
 
 static int
 nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port)
 {
-       union eth_table_entry *entry;
-       int idx, i;
-
-       idx = port;
-       entry = hw->eth_table;
-
-       /* Reading NFP ethernet table obtained before */
-       for (i = 0; i < NSP_ETH_MAX_COUNT; i++) {
-               if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) {
-                       /* port not in use */
-                       entry++;
-                       continue;
-               }
-               if (idx == 0)
-                       break;
-               idx--;
-               entry++;
-       }
-
-       if (i == NSP_ETH_MAX_COUNT)
-               return -EINVAL;
+       struct nfp_eth_table *nfp_eth_table;
 
+       nfp_eth_table = nfp_eth_read_ports(hw->cpp);
        /*
         * hw points to port0 private data. We need hw now pointing to
         * right port.
         */
        hw += port;
-       nfp_eth_copy_mac_reverse((uint8_t *)&hw->mac_addr,
-                                (uint8_t *)&entry->mac_addr);
+       nfp_eth_copy_mac((uint8_t *)&hw->mac_addr,
+                        (uint8_t *)&nfp_eth_table->ports[port].mac_addr);
 
+       free(nfp_eth_table);
        return 0;
 }
 
@@ -732,6 +728,66 @@ nfp_configure_rx_interrupt(struct rte_eth_dev *dev,
        return 0;
 }
 
+static uint32_t
+nfp_check_offloads(struct rte_eth_dev *dev)
+{
+       struct nfp_net_hw *hw;
+       struct rte_eth_conf *dev_conf;
+       struct rte_eth_rxmode *rxmode;
+       struct rte_eth_txmode *txmode;
+       uint32_t ctrl = 0;
+
+       hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       dev_conf = &dev->data->dev_conf;
+       rxmode = &dev_conf->rxmode;
+       txmode = &dev_conf->txmode;
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) {
+               if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM)
+                       ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
+       }
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
+               if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN)
+                       ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
+       }
+
+       if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
+               hw->mtu = rxmode->max_rx_pkt_len;
+
+       if (txmode->offloads & DEV_TX_OFFLOAD_VLAN_INSERT)
+               ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+
+       /* L2 broadcast */
+       if (hw->cap & NFP_NET_CFG_CTRL_L2BC)
+               ctrl |= NFP_NET_CFG_CTRL_L2BC;
+
+       /* L2 multicast */
+       if (hw->cap & NFP_NET_CFG_CTRL_L2MC)
+               ctrl |= NFP_NET_CFG_CTRL_L2MC;
+
+       /* TX checksum offload */
+       if (txmode->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM ||
+           txmode->offloads & DEV_TX_OFFLOAD_UDP_CKSUM ||
+           txmode->offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
+               ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+
+       /* LSO offload */
+       if (txmode->offloads & DEV_TX_OFFLOAD_TCP_TSO) {
+               if (hw->cap & NFP_NET_CFG_CTRL_LSO)
+                       ctrl |= NFP_NET_CFG_CTRL_LSO;
+               else
+                       ctrl |= NFP_NET_CFG_CTRL_LSO2;
+       }
+
+       /* RX gather */
+       if (txmode->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+               ctrl |= NFP_NET_CFG_CTRL_GATHER;
+
+       return ctrl;
+}
+
 static int
 nfp_net_start(struct rte_eth_dev *dev)
 {
@@ -739,6 +795,8 @@ nfp_net_start(struct rte_eth_dev *dev)
        struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        uint32_t new_ctrl, update = 0;
        struct nfp_net_hw *hw;
+       struct rte_eth_conf *dev_conf;
+       struct rte_eth_rxmode *rxmode;
        uint32_t intr_vector;
        int ret;
 
@@ -749,9 +807,6 @@ nfp_net_start(struct rte_eth_dev *dev)
        /* Disabling queues just in case... */
        nfp_net_disable_queues(dev);
 
-       /* Writing configuration parameters in the device */
-       nfp_net_params_setup(hw);
-
        /* Enabling the required queues in the device */
        nfp_net_enable_queues(dev);
 
@@ -786,8 +841,22 @@ nfp_net_start(struct rte_eth_dev *dev)
 
        rte_intr_enable(intr_handle);
 
+       new_ctrl = nfp_check_offloads(dev);
+
+       /* Writing configuration parameters in the device */
+       nfp_net_params_setup(hw);
+
+       dev_conf = &dev->data->dev_conf;
+       rxmode = &dev_conf->rxmode;
+
+       if (rxmode->mq_mode & ETH_MQ_RX_RSS) {
+               nfp_net_rss_config_default(dev);
+               update |= NFP_NET_CFG_UPDATE_RSS;
+               new_ctrl |= NFP_NET_CFG_CTRL_RSS;
+       }
+
        /* Enable device */
-       new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_ENABLE;
+       new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
 
        update |= NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING;
 
@@ -809,7 +878,7 @@ nfp_net_start(struct rte_eth_dev *dev)
 
        if (hw->is_pf)
                /* Configure the physical port up */
-               nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 1);
+               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
 
        hw->ctrl = new_ctrl;
 
@@ -860,7 +929,7 @@ nfp_net_stop(struct rte_eth_dev *dev)
 
        if (hw->is_pf)
                /* Configure the physical port down */
-               nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 0);
+               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
 }
 
 /* Reset and stop device. The device can not be restarted. */
@@ -977,8 +1046,9 @@ static int
 nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 {
        struct nfp_net_hw *hw;
-       struct rte_eth_link link, old;
+       struct rte_eth_link link;
        uint32_t nn_link_status;
+       int ret;
 
        static const uint32_t ls_to_ethtool[] = {
                [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = ETH_SPEED_NUM_NONE,
@@ -995,9 +1065,6 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       memset(&old, 0, sizeof(old));
-       nfp_net_dev_atomic_read_link_status(dev, &old);
-
        nn_link_status = nn_cfg_readl(hw, NFP_NET_CFG_STS);
 
        memset(&link, 0, sizeof(struct rte_eth_link));
@@ -1015,16 +1082,14 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
        else
                link.link_speed = ls_to_ethtool[nn_link_status];
 
-       if (old.link_status != link.link_status) {
-               nfp_net_dev_atomic_write_link_status(dev, &link);
+       ret = rte_eth_linkstatus_set(dev, &link);
+       if (ret == 0) {
                if (link.link_status)
                        PMD_DRV_LOG(INFO, "NIC Link is Up\n");
                else
                        PMD_DRV_LOG(INFO, "NIC Link is Down\n");
-               return 0;
        }
-
-       return -1;
+       return ret;
 }
 
 static int
@@ -1038,6 +1103,8 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        /* RTE_ETHDEV_QUEUE_STAT_CNTRS default value is 16 */
 
+       memset(&nfp_dev_stats, 0, sizeof(nfp_dev_stats));
+
        /* reading per RX ring stats */
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
                if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
@@ -1190,11 +1257,10 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
        dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
        dev_info->min_rx_bufsize = ETHER_MIN_MTU;
-       dev_info->max_rx_pktlen = hw->mtu;
+       dev_info->max_rx_pktlen = hw->max_mtu;
        /* Next should change when PF support is implemented */
        dev_info->max_mac_addrs = 1;
 
@@ -1206,6 +1272,8 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                                             DEV_RX_OFFLOAD_UDP_CKSUM |
                                             DEV_RX_OFFLOAD_TCP_CKSUM;
 
+       dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+
        if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
                dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
 
@@ -1214,6 +1282,12 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                                             DEV_TX_OFFLOAD_UDP_CKSUM |
                                             DEV_TX_OFFLOAD_TCP_CKSUM;
 
+       if (hw->cap & NFP_NET_CFG_CTRL_LSO_ANY)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
+
+       if (hw->cap & NFP_NET_CFG_CTRL_GATHER)
+               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_MULTI_SEGS;
+
        dev_info->default_rxconf = (struct rte_eth_rxconf) {
                .rx_thresh = {
                        .pthresh = DEFAULT_RX_PTHRESH,
@@ -1232,8 +1306,6 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                },
                .tx_free_thresh = DEFAULT_TX_FREE_THRESH,
                .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
-               .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
-                            ETH_TXQ_FLAGS_NOOFFLOADS,
        };
 
        dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
@@ -1244,12 +1316,9 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
        dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
 
-       dev_info->speed_capa = ETH_SPEED_NUM_1G | ETH_LINK_SPEED_10G |
-                              ETH_SPEED_NUM_25G | ETH_SPEED_NUM_40G |
-                              ETH_SPEED_NUM_50G | ETH_LINK_SPEED_100G;
-
-       if (hw->cap & NFP_NET_CFG_CTRL_LSO)
-               dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
+       dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G |
+                              ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G |
+                              ETH_LINK_SPEED_50G | ETH_LINK_SPEED_100G;
 }
 
 static const uint32_t *
@@ -1352,8 +1421,7 @@ nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct rte_eth_link link;
 
-       memset(&link, 0, sizeof(link));
-       nfp_net_dev_atomic_read_link_status(dev, &link);
+       rte_eth_linkstatus_get(dev, &link);
        if (link.link_status)
                RTE_LOG(INFO, PMD, "Port %d: Link Up - speed %u Mbps - %s\n",
                        dev->data->port_id, link.link_speed,
@@ -1406,9 +1474,7 @@ nfp_net_dev_interrupt_handler(void *param)
 
        PMD_DRV_LOG(DEBUG, "We got a LSC interrupt!!!\n");
 
-       /* get the link status */
-       memset(&link, 0, sizeof(link));
-       nfp_net_dev_atomic_read_link_status(dev, &link);
+       rte_eth_linkstatus_get(dev, &link);
 
        nfp_net_link_update(dev, 0);
 
@@ -1448,7 +1514,7 @@ nfp_net_dev_interrupt_delayed_handler(void *param)
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        nfp_net_link_update(dev, 0);
-       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
+       _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 
        nfp_net_dev_link_status_print(dev);
 
@@ -1467,6 +1533,13 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
        if ((mtu < ETHER_MIN_MTU) || ((uint32_t)mtu > hw->max_mtu))
                return -EINVAL;
 
+       /* mtu setting is forbidden if port is started */
+       if (dev->data->dev_started) {
+               PMD_DRV_LOG(ERR, "port %d must be stopped before configuration",
+                           dev->data->port_id);
+               return -EBUSY;
+       }
+
        /* switch to jumbo mode if needed */
        if ((uint32_t)mtu > ETHER_MAX_LEN)
                dev->data->dev_conf.rxmode.jumbo_frame = 1;
@@ -1494,6 +1567,8 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        const struct rte_memzone *tz;
        struct nfp_net_rxq *rxq;
        struct nfp_net_hw *hw;
+       struct rte_eth_conf *dev_conf;
+       struct rte_eth_rxmode *rxmode;
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1507,6 +1582,17 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
+       dev_conf = &dev->data->dev_conf;
+       rxmode = &dev_conf->rxmode;
+
+       if (rx_conf->offloads != rxmode->offloads) {
+               RTE_LOG(ERR, PMD, "queue %u rx offloads not as port offloads\n",
+                                 queue_idx);
+               RTE_LOG(ERR, PMD, "\tport: %" PRIx64 "\n", rxmode->offloads);
+               RTE_LOG(ERR, PMD, "\tqueue: %" PRIx64 "\n", rx_conf->offloads);
+               return -EINVAL;
+       }
+
        /*
         * Free memory prior to re-allocation if needed. This is the case after
         * calling nfp_net_stop
@@ -1562,7 +1648,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        /* Saving physical and virtual addresses for the RX ring */
-       rxq->dma = (uint64_t)tz->phys_addr;
+       rxq->dma = (uint64_t)tz->iova;
        rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
 
        /* mbuf pointers array for referencing mbufs linked to RX descriptors */
@@ -1643,6 +1729,8 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        struct nfp_net_txq *txq;
        uint16_t tx_free_thresh;
        struct nfp_net_hw *hw;
+       struct rte_eth_conf *dev_conf;
+       struct rte_eth_txmode *txmode;
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
@@ -1656,6 +1744,15 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                return -EINVAL;
        }
 
+       dev_conf = &dev->data->dev_conf;
+       txmode = &dev_conf->txmode;
+
+       if (tx_conf->offloads != txmode->offloads) {
+               RTE_LOG(ERR, PMD, "queue %u tx offloads not as port offloads",
+                                 queue_idx);
+               return -EINVAL;
+       }
+
        tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
                                    tx_conf->tx_free_thresh :
                                    DEFAULT_TX_FREE_THRESH);
@@ -1715,10 +1812,9 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
 
        txq->port_id = dev->data->port_id;
-       txq->txq_flags = tx_conf->txq_flags;
 
        /* Saving physical and virtual addresses for the TX ring */
-       txq->dma = (uint64_t)tz->phys_addr;
+       txq->dma = (uint64_t)tz->iova;
        txq->txds = (struct nfp_net_tx_desc *)tz->addr;
 
        /* mbuf pointers array for referencing mbufs linked to TX descriptors */
@@ -1755,7 +1851,7 @@ nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
        uint64_t ol_flags;
        struct nfp_net_hw *hw = txq->hw;
 
-       if (!(hw->cap & NFP_NET_CFG_CTRL_LSO))
+       if (!(hw->cap & NFP_NET_CFG_CTRL_LSO_ANY))
                goto clean_txd;
 
        ol_flags = mb->ol_flags;
@@ -1763,15 +1859,19 @@ nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
        if (!(ol_flags & PKT_TX_TCP_SEG))
                goto clean_txd;
 
-       txd->l4_offset = mb->l2_len + mb->l3_len + mb->l4_len;
-       txd->lso = rte_cpu_to_le_16(mb->tso_segsz);
+       txd->l3_offset = mb->l2_len;
+       txd->l4_offset = mb->l2_len + mb->l3_len;
+       txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len;
+       txd->mss = rte_cpu_to_le_16(mb->tso_segsz);
        txd->flags = PCIE_DESC_TX_LSO;
        return;
 
 clean_txd:
        txd->flags = 0;
+       txd->l3_offset = 0;
        txd->l4_offset = 0;
-       txd->lso = 0;
+       txd->lso_hdrlen = 0;
+       txd->mss = 0;
 }
 
 /* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */
@@ -1857,14 +1957,10 @@ nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
        if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
                return;
 
-       if (NFD_CFG_MAJOR_VERSION_of(hw->ver) <= 3) {
-               if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
-                       return;
-
-               hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
-               hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
-
-       } else if (NFP_DESC_META_LEN(rxd)) {
+       /* this is true for new firmwares */
+       if (likely(((hw->cap & NFP_NET_CFG_CTRL_RSS2) ||
+           (NFD_CFG_MAJOR_VERSION_of(hw->ver) == 4)) &&
+            NFP_DESC_META_LEN(rxd))) {
                /*
                 * new metadata api:
                 * <----  32 bit  ----->
@@ -1897,7 +1993,11 @@ nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
                        return;
                }
        } else {
-               return;
+               if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+                       return;
+
+               hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
+               hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
        }
 
        mbuf->hash.rss = hash;
@@ -1988,16 +2088,16 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        break;
                }
 
+               rxds = &rxq->rxds[rxq->rd_p];
+               if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
+                       break;
+
                /*
                 * Memory barrier to ensure that we won't do other
                 * reads before the DD bit.
                 */
                rte_rmb();
 
-               rxds = &rxq->rxds[rxq->rd_p];
-               if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
-                       break;
-
                /*
                 * We got a packet. Let's alloc a new mbuff for refilling the
                 * free descriptor ring as soon as possible
@@ -2058,6 +2158,8 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                mb->nb_segs = 1;
                mb->next = NULL;
 
+               mb->port = rxq->port_id;
+
                /* Checking the RSS flag */
                nfp_net_set_hash(rxq, rxds, mb);
 
@@ -2067,7 +2169,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
                    (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
                        mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
-                       mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+                       mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
                }
 
                /* Adding the mbuff to the mbuff array passed by the app */
@@ -2266,7 +2368,7 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        *lmbuf = pkt;
 
                        dma_size = pkt->data_len;
-                       dma_addr = rte_mbuf_data_dma_addr(pkt);
+                       dma_addr = rte_mbuf_data_iova(pkt);
                        PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:"
                                   "%" PRIx64 "\n", dma_addr);
 
@@ -2306,11 +2408,12 @@ xmit_end:
        return i;
 }
 
-static void
+static int
 nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
        uint32_t new_ctrl, update;
        struct nfp_net_hw *hw;
+       int ret;
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        new_ctrl = 0;
@@ -2331,32 +2434,28 @@ nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
                new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN;
 
        if (new_ctrl == 0)
-               return;
+               return 0;
 
        update = NFP_NET_CFG_UPDATE_GEN;
 
-       if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
-               return;
+       ret = nfp_net_reconfig(hw, new_ctrl, update);
+       if (!ret)
+               hw->ctrl = new_ctrl;
 
-       hw->ctrl = new_ctrl;
+       return ret;
 }
 
-/* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
 static int
-nfp_net_reta_update(struct rte_eth_dev *dev,
+nfp_net_rss_reta_write(struct rte_eth_dev *dev,
                    struct rte_eth_rss_reta_entry64 *reta_conf,
                    uint16_t reta_size)
 {
        uint32_t reta, mask;
        int i, j;
        int idx, shift;
-       uint32_t update;
        struct nfp_net_hw *hw =
                NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
-               return -EINVAL;
-
        if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) {
                RTE_LOG(ERR, PMD, "The size of hash lookup table configured "
                        "(%d) doesn't match the number hardware can supported "
@@ -2393,6 +2492,26 @@ nfp_net_reta_update(struct rte_eth_dev *dev,
                nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) + shift,
                              reta);
        }
+       return 0;
+}
+
+/* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
+static int
+nfp_net_reta_update(struct rte_eth_dev *dev,
+                   struct rte_eth_rss_reta_entry64 *reta_conf,
+                   uint16_t reta_size)
+{
+       struct nfp_net_hw *hw =
+               NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t update;
+       int ret;
+
+       if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
+               return -EINVAL;
+
+       ret = nfp_net_rss_reta_write(dev, reta_conf, reta_size);
+       if (ret != 0)
+               return ret;
 
        update = NFP_NET_CFG_UPDATE_RSS;
 
@@ -2451,33 +2570,24 @@ nfp_net_reta_query(struct rte_eth_dev *dev,
 }
 
 static int
-nfp_net_rss_hash_update(struct rte_eth_dev *dev,
+nfp_net_rss_hash_write(struct rte_eth_dev *dev,
                        struct rte_eth_rss_conf *rss_conf)
 {
-       uint32_t update;
+       struct nfp_net_hw *hw;
+       uint64_t rss_hf;
        uint32_t cfg_rss_ctrl = 0;
        uint8_t key;
-       uint64_t rss_hf;
        int i;
-       struct nfp_net_hw *hw;
 
        hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       rss_hf = rss_conf->rss_hf;
-
-       /* Checking if RSS is enabled */
-       if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) {
-               if (rss_hf != 0) { /* Enable RSS? */
-                       RTE_LOG(ERR, PMD, "RSS unsupported\n");
-                       return -EINVAL;
-               }
-               return 0; /* Nothing to do */
+       /* Writing the key byte a byte */
+       for (i = 0; i < rss_conf->rss_key_len; i++) {
+               memcpy(&key, &rss_conf->rss_key[i], 1);
+               nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key);
        }
 
-       if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) {
-               RTE_LOG(ERR, PMD, "hash key too long\n");
-               return -EINVAL;
-       }
+       rss_hf = rss_conf->rss_hf;
 
        if (rss_hf & ETH_RSS_IPV4)
                cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 |
@@ -2495,15 +2605,40 @@ nfp_net_rss_hash_update(struct rte_eth_dev *dev,
        /* configuring where to apply the RSS hash */
        nn_cfg_writel(hw, NFP_NET_CFG_RSS_CTRL, cfg_rss_ctrl);
 
-       /* Writing the key byte a byte */
-       for (i = 0; i < rss_conf->rss_key_len; i++) {
-               memcpy(&key, &rss_conf->rss_key[i], 1);
-               nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key);
-       }
-
        /* Writing the key size */
        nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY_SZ, rss_conf->rss_key_len);
 
+       return 0;
+}
+
+static int
+nfp_net_rss_hash_update(struct rte_eth_dev *dev,
+                       struct rte_eth_rss_conf *rss_conf)
+{
+       uint32_t update;
+       uint64_t rss_hf;
+       struct nfp_net_hw *hw;
+
+       hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       rss_hf = rss_conf->rss_hf;
+
+       /* Checking if RSS is enabled */
+       if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) {
+               if (rss_hf != 0) { /* Enable RSS? */
+                       RTE_LOG(ERR, PMD, "RSS unsupported\n");
+                       return -EINVAL;
+               }
+               return 0; /* Nothing to do */
+       }
+
+       if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) {
+               RTE_LOG(ERR, PMD, "hash key too long\n");
+               return -EINVAL;
+       }
+
+       nfp_net_rss_hash_write(dev, rss_conf);
+
        update = NFP_NET_CFG_UPDATE_RSS;
 
        if (nfp_net_reconfig(hw, hw->ctrl, update) < 0)
@@ -2560,6 +2695,47 @@ nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev,
        return 0;
 }
 
+static int
+nfp_net_rss_config_default(struct rte_eth_dev *dev)
+{
+       struct rte_eth_conf *dev_conf;
+       struct rte_eth_rss_conf rss_conf;
+       struct rte_eth_rss_reta_entry64 nfp_reta_conf[2];
+       uint16_t rx_queues = dev->data->nb_rx_queues;
+       uint16_t queue;
+       int i, j, ret;
+
+       RTE_LOG(INFO, PMD, "setting default RSS conf for %u queues\n",
+               rx_queues);
+
+       nfp_reta_conf[0].mask = ~0x0;
+       nfp_reta_conf[1].mask = ~0x0;
+
+       queue = 0;
+       for (i = 0; i < 0x40; i += 8) {
+               for (j = i; j < (i + 8); j++) {
+                       nfp_reta_conf[0].reta[j] = queue;
+                       nfp_reta_conf[1].reta[j] = queue++;
+                       queue %= rx_queues;
+               }
+       }
+       ret = nfp_net_rss_reta_write(dev, nfp_reta_conf, 0x80);
+       if (ret != 0)
+               return ret;
+
+       dev_conf = &dev->data->dev_conf;
+       if (!dev_conf) {
+               RTE_LOG(INFO, PMD, "wrong rss conf");
+               return -EINVAL;
+       }
+       rss_conf = dev_conf->rx_adv_conf.rss_conf;
+
+       ret = nfp_net_rss_hash_write(dev, &rss_conf);
+
+       return ret;
+}
+
+
 /* Initialise and register driver with DPDK Application */
 static const struct eth_dev_ops nfp_net_eth_dev_ops = {
        .dev_configure          = nfp_net_configure,
@@ -2629,10 +2805,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        uint64_t tx_bar_off = 0, rx_bar_off = 0;
        uint32_t start_q;
        int stride = 4;
-
-       nspu_desc_t *nspu_desc = NULL;
-       uint64_t bar_offset;
        int port = 0;
+       int err;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -2653,7 +2827,6 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
                /* This points to the specific port private data */
                hw = &hwport0[port];
-               hw->pf_port_idx = port;
        } else {
                hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
                hwport0 = 0;
@@ -2668,9 +2841,6 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                return 0;
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
-       /* hotplug is not possible with multiport PF */
-       if (!hw->pf_multiport_enabled)
-               eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
        hw->device_id = pci_dev->id.device_id;
        hw->vendor_id = pci_dev->id.vendor_id;
@@ -2690,19 +2860,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        }
 
        if (hw->is_pf && port == 0) {
-               nspu_desc = hw->nspu_desc;
-
-               if (nfp_nsp_map_ctrl_bar(nspu_desc, &bar_offset) != 0) {
-                       /*
-                        * A firmware should be there after PF probe so this
-                        * should not happen.
-                        */
-                       RTE_LOG(ERR, PMD, "PF BAR symbol resolution failed\n");
-                       return -ENODEV;
+               hw->ctrl_bar = nfp_rtsym_map(hw->sym_tbl, "_pf0_net_bar0",
+                                            hw->total_ports * 32768,
+                                            &hw->ctrl_area);
+               if (!hw->ctrl_bar) {
+                       printf("nfp_rtsym_map fails for _pf0_net_ctrl_bar\n");
+                       return -EIO;
                }
 
-               /* vNIC PF control BAR is a subset of PF PCI device BAR */
-               hw->ctrl_bar += bar_offset;
                PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar);
        }
 
@@ -2726,13 +2891,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        case PCI_DEVICE_ID_NFP6000_PF_NIC:
        case PCI_DEVICE_ID_NFP6000_VF_NIC:
                start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ);
-               tx_bar_off = NFP_PCIE_QUEUE(start_q);
+               tx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ;
                start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_RXQ);
-               rx_bar_off = NFP_PCIE_QUEUE(start_q);
+               rx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ;
                break;
        default:
                RTE_LOG(ERR, PMD, "nfp_net: no device ID matching\n");
-               return -ENODEV;
+               err = -ENODEV;
+               goto dev_err_ctrl_map;
        }
 
        PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "\n", tx_bar_off);
@@ -2740,17 +2906,19 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
        if (hw->is_pf && port == 0) {
                /* configure access to tx/rx vNIC BARs */
-               nfp_nsp_map_queues_bar(nspu_desc, &bar_offset);
-               PMD_INIT_LOG(DEBUG, "tx/rx bar_offset: %" PRIx64 "\n",
-                                   bar_offset);
-               hwport0->hw_queues = (uint8_t *)pci_dev->mem_resource[0].addr;
-
-               /* vNIC PF tx/rx BARs are a subset of PF PCI device */
-               hwport0->hw_queues += bar_offset;
+               hwport0->hw_queues = nfp_cpp_map_area(hw->cpp, 0, 0,
+                                                     NFP_PCIE_QUEUE(0),
+                                                     NFP_QCP_QUEUE_AREA_SZ,
+                                                     &hw->hwqueues_area);
+
+               if (!hwport0->hw_queues) {
+                       printf("nfp_rtsym_map fails for net.qc\n");
+                       err = -EIO;
+                       goto dev_err_ctrl_map;
+               }
 
-               /* Lets seize the chance to read eth table from hw */
-               if (nfp_nsp_eth_read_table(nspu_desc, &hw->eth_table))
-                       return -ENODEV;
+               PMD_INIT_LOG(DEBUG, "tx/rx bar address: 0x%p\n",
+                                   hwport0->hw_queues);
        }
 
        if (hw->is_pf) {
@@ -2773,16 +2941,22 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        hw->ver = nn_cfg_readl(hw, NFP_NET_CFG_VERSION);
        hw->cap = nn_cfg_readl(hw, NFP_NET_CFG_CAP);
        hw->max_mtu = nn_cfg_readl(hw, NFP_NET_CFG_MAX_MTU);
-       hw->mtu = hw->max_mtu;
+       hw->mtu = ETHER_MTU;
+
+       /* VLAN insertion is incompatible with LSOv2 */
+       if (hw->cap & NFP_NET_CFG_CTRL_LSO2)
+               hw->cap &= ~NFP_NET_CFG_CTRL_TXVLAN;
 
        if (NFD_CFG_MAJOR_VERSION_of(hw->ver) < 2)
                hw->rx_offset = NFP_NET_RX_OFFSET;
        else
                hw->rx_offset = nn_cfg_readl(hw, NFP_NET_CFG_RX_OFFSET_ADDR);
 
-       PMD_INIT_LOG(INFO, "VER: %#x, Maximum supported MTU: %d",
-                    hw->ver, hw->max_mtu);
-       PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s%s%s", hw->cap,
+       PMD_INIT_LOG(INFO, "VER: %u.%u, Maximum supported MTU: %d",
+                          NFD_CFG_MAJOR_VERSION_of(hw->ver),
+                          NFD_CFG_MINOR_VERSION_of(hw->ver), hw->max_mtu);
+
+       PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s%s%s%s%s", hw->cap,
                     hw->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
                     hw->cap & NFP_NET_CFG_CTRL_L2BC    ? "L2BCFILT " : "",
                     hw->cap & NFP_NET_CFG_CTRL_L2MC    ? "L2MCFILT " : "",
@@ -2793,7 +2967,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                     hw->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "",
                     hw->cap & NFP_NET_CFG_CTRL_GATHER  ? "GATHER "  : "",
                     hw->cap & NFP_NET_CFG_CTRL_LSO     ? "TSO "     : "",
-                    hw->cap & NFP_NET_CFG_CTRL_RSS     ? "RSS "     : "");
+                    hw->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSOv2 "     : "",
+                    hw->cap & NFP_NET_CFG_CTRL_RSS     ? "RSS "     : "",
+                    hw->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSSv2 "     : "");
 
        hw->ctrl = 0;
 
@@ -2810,7 +2986,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0);
        if (eth_dev->data->mac_addrs == NULL) {
                PMD_INIT_LOG(ERR, "Failed to space for MAC address");
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto dev_err_queues_map;
        }
 
        if (hw->is_pf) {
@@ -2821,6 +2998,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        }
 
        if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr)) {
+               PMD_INIT_LOG(INFO, "Using random mac address for port %d\n",
+                                  port);
                /* Using random mac addresses for VFs */
                eth_random_addr(&hw->mac_addr[0]);
                nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
@@ -2849,11 +3028,19 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        nfp_net_stats_reset(eth_dev);
 
        return 0;
+
+dev_err_queues_map:
+               nfp_cpp_area_free(hw->hwqueues_area);
+dev_err_ctrl_map:
+               nfp_cpp_area_free(hw->ctrl_area);
+
+       return err;
 }
 
 static int
 nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
-                 nfpu_desc_t *nfpu_desc, void **priv)
+                 struct nfp_cpp *cpp, struct nfp_hwinfo *hwinfo,
+                 int phys_port, struct nfp_rtsym_table *sym_tbl, void **priv)
 {
        struct rte_eth_dev *eth_dev;
        struct nfp_net_hw *hw;
@@ -2891,12 +3078,16 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
         * Then dev_private is adjusted per port.
         */
        hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
-       hw->nspu_desc = nfpu_desc->nspu;
-       hw->nfpu_desc = nfpu_desc;
+       hw->cpp = cpp;
+       hw->hwinfo = hwinfo;
+       hw->sym_tbl = sym_tbl;
+       hw->pf_port_idx = phys_port;
        hw->is_pf = 1;
        if (ports > 1)
                hw->pf_multiport_enabled = 1;
 
+       hw->total_ports = ports;
+
        eth_dev->device = &dev->device;
        rte_eth_copy_pci_info(eth_dev, dev);
 
@@ -2910,55 +3101,191 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
        return ret;
 }
 
+#define DEFAULT_FW_PATH       "/lib/firmware/netronome"
+
+static int
+nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card)
+{
+       struct nfp_cpp *cpp = nsp->cpp;
+       int fw_f;
+       char *fw_buf;
+       char fw_name[100];
+       char serial[100];
+       struct stat file_stat;
+       off_t fsize, bytes;
+
+       /* Looking for firmware file in order of priority */
+
+       /* First try to find a firmware image specific for this device */
+       sprintf(serial, "serial-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x",
+               cpp->serial[0], cpp->serial[1], cpp->serial[2], cpp->serial[3],
+               cpp->serial[4], cpp->serial[5], cpp->interface >> 8,
+               cpp->interface & 0xff);
+
+       sprintf(fw_name, "%s/%s.nffw", DEFAULT_FW_PATH, serial);
+
+       RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name);
+       fw_f = open(fw_name, O_RDONLY);
+       if (fw_f > 0)
+               goto read_fw;
+
+       /* Then try the PCI name */
+       sprintf(fw_name, "%s/pci-%s.nffw", DEFAULT_FW_PATH, dev->device.name);
+
+       RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name);
+       fw_f = open(fw_name, O_RDONLY);
+       if (fw_f > 0)
+               goto read_fw;
+
+       /* Finally try the card type and media */
+       sprintf(fw_name, "%s/%s", DEFAULT_FW_PATH, card);
+       RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name);
+       fw_f = open(fw_name, O_RDONLY);
+       if (fw_f < 0) {
+               RTE_LOG(INFO, PMD, "Firmware file %s not found.", fw_name);
+               return -ENOENT;
+       }
+
+read_fw:
+       if (fstat(fw_f, &file_stat) < 0) {
+               RTE_LOG(INFO, PMD, "Firmware file %s size is unknown", fw_name);
+               close(fw_f);
+               return -ENOENT;
+       }
+
+       fsize = file_stat.st_size;
+       RTE_LOG(INFO, PMD, "Firmware file found at %s with size: %" PRIu64 "\n",
+                           fw_name, (uint64_t)fsize);
+
+       fw_buf = malloc((size_t)fsize);
+       if (!fw_buf) {
+               RTE_LOG(INFO, PMD, "malloc failed for fw buffer");
+               close(fw_f);
+               return -ENOMEM;
+       }
+       memset(fw_buf, 0, fsize);
+
+       bytes = read(fw_f, fw_buf, fsize);
+       if (bytes != fsize) {
+               RTE_LOG(INFO, PMD, "Reading fw to buffer failed.\n"
+                                  "Just %" PRIu64 " of %" PRIu64 " bytes read",
+                                  (uint64_t)bytes, (uint64_t)fsize);
+               free(fw_buf);
+               close(fw_f);
+               return -EIO;
+       }
+
+       RTE_LOG(INFO, PMD, "Uploading the firmware ...");
+       nfp_nsp_load_fw(nsp, fw_buf, bytes);
+       RTE_LOG(INFO, PMD, "Done");
+
+       free(fw_buf);
+       close(fw_f);
+
+       return 0;
+}
+
+static int
+nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp,
+            struct nfp_eth_table *nfp_eth_table, struct nfp_hwinfo *hwinfo)
+{
+       struct nfp_nsp *nsp;
+       const char *nfp_fw_model;
+       char card_desc[100];
+       int err = 0;
+
+       nfp_fw_model = nfp_hwinfo_lookup(hwinfo, "assembly.partno");
+
+       if (nfp_fw_model) {
+               RTE_LOG(INFO, PMD, "firmware model found: %s\n", nfp_fw_model);
+       } else {
+               RTE_LOG(ERR, PMD, "firmware model NOT found\n");
+               return -EIO;
+       }
+
+       if (nfp_eth_table->count == 0 || nfp_eth_table->count > 8) {
+               RTE_LOG(ERR, PMD, "NFP ethernet table reports wrong ports: %u\n",
+                      nfp_eth_table->count);
+               return -EIO;
+       }
+
+       RTE_LOG(INFO, PMD, "NFP ethernet port table reports %u ports\n",
+                          nfp_eth_table->count);
+
+       RTE_LOG(INFO, PMD, "Port speed: %u\n", nfp_eth_table->ports[0].speed);
+
+       sprintf(card_desc, "nic_%s_%dx%d.nffw", nfp_fw_model,
+               nfp_eth_table->count, nfp_eth_table->ports[0].speed / 1000);
+
+       nsp = nfp_nsp_open(cpp);
+       if (!nsp) {
+               RTE_LOG(ERR, PMD, "NFP error when obtaining NSP handle\n");
+               return -EIO;
+       }
+
+       nfp_nsp_device_soft_reset(nsp);
+       err = nfp_fw_upload(dev, nsp, card_desc);
+
+       nfp_nsp_close(nsp);
+       return err;
+}
+
 static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                            struct rte_pci_device *dev)
 {
-       nfpu_desc_t *nfpu_desc;
-       nspu_desc_t *nspu_desc;
-       uint64_t offset_symbol;
-       uint8_t *bar_offset;
-       int major, minor;
+       struct nfp_cpp *cpp;
+       struct nfp_hwinfo *hwinfo;
+       struct nfp_rtsym_table *sym_tbl;
+       struct nfp_eth_table *nfp_eth_table = NULL;
        int total_ports;
        void *priv = 0;
        int ret = -ENODEV;
+       int err;
        int i;
 
        if (!dev)
                return ret;
 
-       nfpu_desc = rte_malloc("nfp nfpu", sizeof(nfpu_desc_t), 0);
-       if (!nfpu_desc)
-               return -ENOMEM;
-
-       if (nfpu_open(dev, nfpu_desc, 0) < 0) {
-               RTE_LOG(ERR, PMD,
-                       "nfpu_open failed\n");
-               goto nfpu_error;
+       cpp = nfp_cpp_from_device_name(dev->device.name);
+       if (!cpp) {
+               RTE_LOG(ERR, PMD, "A CPP handle can not be obtained");
+               ret = -EIO;
+               goto error;
        }
 
-       nspu_desc = nfpu_desc->nspu;
+       hwinfo = nfp_hwinfo_read(cpp);
+       if (!hwinfo) {
+               RTE_LOG(ERR, PMD, "Error reading hwinfo table");
+               return -EIO;
+       }
 
+       nfp_eth_table = nfp_eth_read_ports(cpp);
+       if (!nfp_eth_table) {
+               RTE_LOG(ERR, PMD, "Error reading NFP ethernet table\n");
+               return -EIO;
+       }
 
-       /* Check NSP ABI version */
-       if (nfp_nsp_get_abi_version(nspu_desc, &major, &minor) < 0) {
-               RTE_LOG(INFO, PMD, "NFP NSP not present\n");
+       if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) {
+               RTE_LOG(INFO, PMD, "Error when uploading firmware\n");
+               ret = -EIO;
                goto error;
        }
-       PMD_INIT_LOG(INFO, "nspu ABI version: %d.%d\n", major, minor);
 
-       if ((major == 0) && (minor < 20)) {
-               RTE_LOG(INFO, PMD, "NFP NSP ABI version too old. Required 0.20 or higher\n");
+       /* Now the symbol table should be there */
+       sym_tbl = nfp_rtsym_table_read(cpp);
+       if (!sym_tbl) {
+               RTE_LOG(ERR, PMD, "Something is wrong with the firmware"
+                               " symbol table");
+               ret = -EIO;
                goto error;
        }
 
-       ret = nfp_nsp_fw_setup(nspu_desc, "nfd_cfg_pf0_num_ports",
-                              &offset_symbol);
-       if (ret)
+       total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err);
+       if (total_ports != (int)nfp_eth_table->count) {
+               RTE_LOG(ERR, PMD, "Inconsistent number of ports\n");
+               ret = -EIO;
                goto error;
-
-       bar_offset = (uint8_t *)dev->mem_resource[0].addr;
-       bar_offset += offset_symbol;
-       total_ports = (uint32_t)*bar_offset;
+       }
        PMD_INIT_LOG(INFO, "Total pf ports: %d\n", total_ports);
 
        if (total_ports <= 0 || total_ports > 8) {
@@ -2968,21 +3295,21 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        }
 
        for (i = 0; i < total_ports; i++) {
-               ret = nfp_pf_create_dev(dev, i, total_ports, nfpu_desc, &priv);
+               ret = nfp_pf_create_dev(dev, i, total_ports, cpp, hwinfo,
+                                       nfp_eth_table->ports[i].index,
+                                       sym_tbl, &priv);
                if (ret)
-                       goto error;
+                       break;
        }
 
-       return 0;
-
 error:
-       nfpu_close(nfpu_desc);
-nfpu_error:
-       rte_free(nfpu_desc);
-
+       free(nfp_eth_table);
        return ret;
 }
 
+int nfp_logtype_init;
+int nfp_logtype_driver;
+
 static const struct rte_pci_id pci_id_nfp_pf_net_map[] = {
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
@@ -3016,6 +3343,33 @@ static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 
 static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
 {
+       struct rte_eth_dev *eth_dev;
+       struct nfp_net_hw *hw, *hwport0;
+       int port = 0;
+
+       eth_dev = rte_eth_dev_allocated(pci_dev->device.name);
+       if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
+           (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
+               port = get_pf_port_number(eth_dev->data->name);
+               /*
+                * hotplug is not possible with multiport PF although freeing
+                * data structures can be done for first port.
+                */
+               if (port != 0)
+                       return -ENOTSUP;
+               hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+               hw = &hwport0[port];
+               nfp_cpp_area_free(hw->ctrl_area);
+               nfp_cpp_area_free(hw->hwqueues_area);
+               free(hw->hwinfo);
+               free(hw->sym_tbl);
+               nfp_cpp_free(hw->cpp);
+       } else {
+               hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+       }
+       /* hotplug is not possible with multiport PF */
+       if (hw->pf_multiport_enabled)
+               return -ENOTSUP;
        return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
 }
 
@@ -3040,6 +3394,17 @@ RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
 
+RTE_INIT(nfp_init_log);
+static void
+nfp_init_log(void)
+{
+       nfp_logtype_init = rte_log_register("pmd.net.nfp.init");
+       if (nfp_logtype_init >= 0)
+               rte_log_set_level(nfp_logtype_init, RTE_LOG_NOTICE);
+       nfp_logtype_driver = rte_log_register("pmd.net.nfp.driver");
+       if (nfp_logtype_driver >= 0)
+               rte_log_set_level(nfp_logtype_driver, RTE_LOG_NOTICE);
+}
 /*
  * Local variables:
  * c-file-style: "Linux"