net: add rte prefix to ether structures
[dpdk.git] / drivers / net / nfp / nfp_net.c
index 80dc273..95c2be1 100644 (file)
@@ -54,6 +54,7 @@
 #include <rte_string_fns.h>
 #include <rte_alarm.h>
 #include <rte_spinlock.h>
+#include <rte_service_component.h>
 
 #include "nfpcore/nfp_cpp.h"
 #include "nfpcore/nfp_nffw.h"
 #include "nfp_net_logs.h"
 #include "nfp_net_ctrl.h"
 
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
 /* Prototypes */
 static void nfp_net_close(struct rte_eth_dev *dev);
 static int nfp_net_configure(struct rte_eth_dev *dev);
@@ -110,7 +119,7 @@ static int nfp_net_rss_reta_write(struct rte_eth_dev *dev,
 static int nfp_net_rss_hash_write(struct rte_eth_dev *dev,
                        struct rte_eth_rss_conf *rss_conf);
 static int nfp_set_mac_addr(struct rte_eth_dev *dev,
-                            struct ether_addr *mac_addr);
+                            struct rte_ether_addr *mac_addr);
 
 /* The offset of the queue controller queues in the PCIe Target */
 #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff)))
@@ -411,10 +420,6 @@ nfp_net_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* Checking RX offloads */
-       if (!(rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP))
-               PMD_INIT_LOG(INFO, "HW does strip CRC. No configurable!");
-
        return 0;
 }
 
@@ -548,7 +553,7 @@ nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac)
 }
 
 int
-nfp_set_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+nfp_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
 {
        struct nfp_net_hw *hw;
        uint32_t update, ctrl;
@@ -570,7 +575,10 @@ nfp_set_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 
        /* Signal the NIC about the change */
        update = NFP_NET_CFG_UPDATE_MACADDR;
-       ctrl = hw->ctrl | NFP_NET_CFG_CTRL_LIVE_ADDR;
+       ctrl = hw->ctrl;
+       if ((hw->ctrl & NFP_NET_CFG_CTRL_ENABLE) &&
+           (hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
+               ctrl |= NFP_NET_CFG_CTRL_LIVE_ADDR;
        if (nfp_net_reconfig(hw, ctrl, update) < 0) {
                PMD_INIT_LOG(INFO, "MAC address update failed");
                return -EIO;
@@ -762,7 +770,7 @@ nfp_net_start(struct rte_eth_dev *dev)
                return -EIO;
 
        /*
-        * Allocating rte mbuffs for configured rx queues.
+        * Allocating rte mbufs for configured rx queues.
         * This requires queues being enabled before
         */
        if (nfp_net_rx_freelist_setup(dev) < 0) {
@@ -770,9 +778,14 @@ nfp_net_start(struct rte_eth_dev *dev)
                goto error;
        }
 
-       if (hw->is_pf)
-               /* Configure the physical port up */
-               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
+       if (hw->is_pf) {
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+                       /* Configure the physical port up */
+                       nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
+               else
+                       nfp_eth_set_configured(dev->process_private,
+                                              hw->pf_port_idx, 1);
+       }
 
        hw->ctrl = new_ctrl;
 
@@ -821,9 +834,56 @@ nfp_net_stop(struct rte_eth_dev *dev)
                        (struct nfp_net_rxq *)dev->data->rx_queues[i]);
        }
 
-       if (hw->is_pf)
+       if (hw->is_pf) {
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+                       /* Configure the physical port down */
+                       nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
+               else
+                       nfp_eth_set_configured(dev->process_private,
+                                              hw->pf_port_idx, 0);
+       }
+}
+
+/* Set the link up. */
+static int
+nfp_net_set_link_up(struct rte_eth_dev *dev)
+{
+       struct nfp_net_hw *hw;
+
+       PMD_DRV_LOG(DEBUG, "Set link up");
+
+       hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (!hw->is_pf)
+               return -ENOTSUP;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               /* Configure the physical port down */
+               return nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
+       else
+               return nfp_eth_set_configured(dev->process_private,
+                                             hw->pf_port_idx, 1);
+}
+
+/* Set the link down. */
+static int
+nfp_net_set_link_down(struct rte_eth_dev *dev)
+{
+       struct nfp_net_hw *hw;
+
+       PMD_DRV_LOG(DEBUG, "Set link down");
+
+       hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (!hw->is_pf)
+               return -ENOTSUP;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                /* Configure the physical port down */
-               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
+               return nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
+       else
+               return nfp_eth_set_configured(dev->process_private,
+                                             hw->pf_port_idx, 0);
 }
 
 /* Reset and stop device. The device can not be restarted. */
@@ -1202,8 +1262,10 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
        };
 
-       dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
+       dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 |
+                                          ETH_RSS_NONFRAG_IPV4_TCP |
                                           ETH_RSS_NONFRAG_IPV4_UDP |
+                                          ETH_RSS_IPV6 |
                                           ETH_RSS_NONFRAG_IPV6_TCP |
                                           ETH_RSS_NONFRAG_IPV6_UDP;
 
@@ -1436,9 +1498,9 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 
        /* switch to jumbo mode if needed */
        if ((uint32_t)mtu > ETHER_MAX_LEN)
-               dev->data->dev_conf.rxmode.jumbo_frame = 1;
+               dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
        else
-               dev->data->dev_conf.rxmode.jumbo_frame = 0;
+               dev->data->dev_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
 
        /* update max frame size */
        dev->data->dev_conf.rxmode.max_rx_pkt_len = (uint32_t)mtu;
@@ -1489,7 +1551,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        if (rxq == NULL)
                return -ENOMEM;
 
-       /* Hw queues mapping based on firmware confifguration */
+       /* Hw queues mapping based on firmware configuration */
        rxq->qidx = queue_idx;
        rxq->fl_qcidx = queue_idx * hw->stride_rx;
        rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
@@ -1508,8 +1570,6 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->rx_count = nb_desc;
        rxq->port_id = dev->data->port_id;
        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
-       rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0
-                                 : ETHER_CRC_LEN);
        rxq->drop_en = rx_conf->rx_drop_en;
 
        /*
@@ -1523,7 +1583,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
                                   socket_id);
 
        if (tz == NULL) {
-               PMD_DRV_LOG(ERR, "Error allocatig rx dma");
+               PMD_DRV_LOG(ERR, "Error allocating rx dma");
                nfp_net_rx_queue_release(rxq);
                return -ENOMEM;
        }
@@ -1785,21 +1845,20 @@ nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
                return;
 
        /* If IPv4 and IP checksum error, fail */
-       if ((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))
+       if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
+           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK)))
                mb->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
 
        /* If neither UDP nor TCP return */
        if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
            !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM))
                return;
 
-       if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK))
-               mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-
-       if ((rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK))
+       if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK))
+               mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       else
                mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 }
 
@@ -1883,6 +1942,18 @@ nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
        case NFP_NET_RSS_IPV6_EX:
                mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
                break;
+       case NFP_NET_RSS_IPV4_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV4_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
        default:
                mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK;
        }
@@ -1899,7 +1970,7 @@ nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq)
 /*
  * RX path design:
  *
- * There are some decissions to take:
+ * There are some decisions to take:
  * 1) How to check DD RX descriptors bit
  * 2) How and when to allocate new mbufs
  *
@@ -1969,7 +2040,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                rte_rmb();
 
                /*
-                * We got a packet. Let's alloc a new mbuff for refilling the
+                * We got a packet. Let's alloc a new mbuf for refilling the
                 * free descriptor ring as soon as possible
                 */
                new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
@@ -1984,8 +2055,8 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                nb_hold++;
 
                /*
-                * Grab the mbuff and refill the descriptor with the
-                * previously allocated mbuff
+                * Grab the mbuf and refill the descriptor with the
+                * previously allocated mbuf
                 */
                mb = rxb->mbuf;
                rxb->mbuf = new_mb;
@@ -2017,7 +2088,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        return -EINVAL;
                }
 
-               /* Filling the received mbuff with packet info */
+               /* Filling the received mbuf with packet info */
                if (hw->rx_offset)
                        mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset;
                else
@@ -2042,7 +2113,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
                }
 
-               /* Adding the mbuff to the mbuff array passed by the app */
+               /* Adding the mbuf to the mbuf array passed by the app */
                rx_pkts[avail++] = mb;
 
                /* Now resetting and updating the descriptor */
@@ -2255,11 +2326,15 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                txq->wr_p = 0;
 
                        pkt_size -= dma_size;
-                       if (!pkt_size)
-                               /* End of packet */
-                               txds->offset_eop |= PCIE_DESC_TX_EOP;
+
+                       /*
+                        * Making the EOP, packets with just one segment
+                        * the priority
+                        */
+                       if (likely(!pkt_size))
+                               txds->offset_eop = PCIE_DESC_TX_EOP;
                        else
-                               txds->offset_eop &= PCIE_DESC_TX_OFFSET_MASK;
+                               txds->offset_eop = 0;
 
                        pkt = pkt->next;
                        /* Referencing next free TX descriptor */
@@ -2432,7 +2507,7 @@ nfp_net_reta_query(struct rte_eth_dev *dev,
                for (j = 0; j < 4; j++) {
                        if (!(mask & (0x1 << j)))
                                continue;
-                       reta_conf->reta[shift + j] =
+                       reta_conf[idx].reta[shift + j] =
                                (uint8_t)((reta >> (8 * j)) & 0xF);
                }
        }
@@ -2460,14 +2535,22 @@ nfp_net_rss_hash_write(struct rte_eth_dev *dev,
        rss_hf = rss_conf->rss_hf;
 
        if (rss_hf & ETH_RSS_IPV4)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 |
-                               NFP_NET_CFG_RSS_IPV4_TCP |
-                               NFP_NET_CFG_RSS_IPV4_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_UDP;
 
        if (rss_hf & ETH_RSS_IPV6)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6 |
-                               NFP_NET_CFG_RSS_IPV6_TCP |
-                               NFP_NET_CFG_RSS_IPV6_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_UDP;
 
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
@@ -2611,6 +2694,8 @@ static const struct eth_dev_ops nfp_net_eth_dev_ops = {
        .dev_configure          = nfp_net_configure,
        .dev_start              = nfp_net_start,
        .dev_stop               = nfp_net_stop,
+       .dev_set_link_up        = nfp_net_set_link_up,
+       .dev_set_link_down      = nfp_net_set_link_down,
        .dev_close              = nfp_net_close,
        .promiscuous_enable     = nfp_net_promisc_enable,
        .promiscuous_disable    = nfp_net_promisc_disable,
@@ -2683,6 +2768,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
+       /* NFP can not handle DMA addresses requiring more than 40 bits */
+       if (rte_mem_check_dma_mask(40)) {
+               RTE_LOG(ERR, PMD, "device %s can not be used:",
+                                  pci_dev->device.name);
+               RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n");
+               return -ENODEV;
+       };
+
        if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
            (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
                port = get_pf_port_number(eth_dev->data->name);
@@ -2762,9 +2855,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        case PCI_DEVICE_ID_NFP6000_PF_NIC:
        case PCI_DEVICE_ID_NFP6000_VF_NIC:
                start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ);
-               tx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ;
+               tx_bar_off = (uint64_t)start_q * NFP_QCP_QUEUE_ADDR_SZ;
                start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_RXQ);
-               rx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ;
+               rx_bar_off = (uint64_t)start_q * NFP_QCP_QUEUE_ADDR_SZ;
                break;
        default:
                PMD_DRV_LOG(ERR, "nfp_net: no device ID matching");
@@ -2869,7 +2962,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                nfp_net_vf_read_mac(hw);
        }
 
-       if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr)) {
+       if (!is_valid_assigned_ether_addr(
+                   (struct rte_ether_addr *)&hw->mac_addr)) {
                PMD_INIT_LOG(INFO, "Using random mac address for port %d",
                                   port);
                /* Using random mac addresses for VFs */
@@ -2878,9 +2972,12 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        }
 
        /* Copying mac address to DPDK eth_dev struct */
-       ether_addr_copy((struct ether_addr *)hw->mac_addr,
+       ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
                        &eth_dev->data->mac_addrs[0]);
 
+       if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
+               eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR;
+
        PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x "
                     "mac=%02x:%02x:%02x:%02x:%02x:%02x",
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
@@ -2888,16 +2985,16 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
                     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
 
-       /* Registering LSC interrupt handler */
-       rte_intr_callback_register(&pci_dev->intr_handle,
-                                  nfp_net_dev_interrupt_handler,
-                                  (void *)eth_dev);
-
-       /* Telling the firmware about the LSC interrupt entry */
-       nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
-
-       /* Recording current stats counters values */
-       nfp_net_stats_reset(eth_dev);
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               /* Registering LSC interrupt handler */
+               rte_intr_callback_register(&pci_dev->intr_handle,
+                                          nfp_net_dev_interrupt_handler,
+                                          (void *)eth_dev);
+               /* Telling the firmware about the LSC interrupt entry */
+               nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+               /* Recording current stats counters values */
+               nfp_net_stats_reset(eth_dev);
+       }
 
        return 0;
 
@@ -2909,70 +3006,466 @@ dev_err_ctrl_map:
        return err;
 }
 
+#define NFP_CPP_MEMIO_BOUNDARY         (1 << 20)
+
+/*
+ * Serving a write request to NFP from host programs. The request
+ * sends the write size and the CPP target. The bridge makes use
+ * of CPP interface handler configured by the PMD setup.
+ */
+static int
+nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp)
+{
+       struct nfp_cpp_area *area;
+       off_t offset, nfp_offset;
+       uint32_t cpp_id, pos, len;
+       uint32_t tmpbuf[16];
+       size_t count, curlen, totlen = 0;
+       int err = 0;
+
+       PMD_CPP_LOG(DEBUG, "%s: offset size %lu, count_size: %lu\n", __func__,
+               sizeof(off_t), sizeof(size_t));
+
+       /* Reading the count param */
+       err = recv(sockfd, &count, sizeof(off_t), 0);
+       if (err != sizeof(off_t))
+               return -EINVAL;
+
+       curlen = count;
+
+       /* Reading the offset param */
+       err = recv(sockfd, &offset, sizeof(off_t), 0);
+       if (err != sizeof(off_t))
+               return -EINVAL;
+
+       /* Obtain target's CPP ID and offset in target */
+       cpp_id = (offset >> 40) << 8;
+       nfp_offset = offset & ((1ull << 40) - 1);
+
+       PMD_CPP_LOG(DEBUG, "%s: count %lu and offset %ld\n", __func__, count,
+               offset);
+       PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %ld\n", __func__,
+               cpp_id, nfp_offset);
+
+       /* Adjust length if not aligned */
+       if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) !=
+           (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) {
+               curlen = NFP_CPP_MEMIO_BOUNDARY -
+                       (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1));
+       }
+
+       while (count > 0) {
+               /* configure a CPP PCIe2CPP BAR for mapping the CPP target */
+               area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev",
+                                                   nfp_offset, curlen);
+               if (!area) {
+                       RTE_LOG(ERR, PMD, "%s: area alloc fail\n", __func__);
+                       return -EIO;
+               }
+
+               /* mapping the target */
+               err = nfp_cpp_area_acquire(area);
+               if (err < 0) {
+                       RTE_LOG(ERR, PMD, "area acquire failed\n");
+                       nfp_cpp_area_free(area);
+                       return -EIO;
+               }
+
+               for (pos = 0; pos < curlen; pos += len) {
+                       len = curlen - pos;
+                       if (len > sizeof(tmpbuf))
+                               len = sizeof(tmpbuf);
+
+                       PMD_CPP_LOG(DEBUG, "%s: Receive %u of %lu\n", __func__,
+                                          len, count);
+                       err = recv(sockfd, tmpbuf, len, MSG_WAITALL);
+                       if (err != (int)len) {
+                               RTE_LOG(ERR, PMD,
+                                       "%s: error when receiving, %d of %lu\n",
+                                       __func__, err, count);
+                               nfp_cpp_area_release(area);
+                               nfp_cpp_area_free(area);
+                               return -EIO;
+                       }
+                       err = nfp_cpp_area_write(area, pos, tmpbuf, len);
+                       if (err < 0) {
+                               RTE_LOG(ERR, PMD, "nfp_cpp_area_write error\n");
+                               nfp_cpp_area_release(area);
+                               nfp_cpp_area_free(area);
+                               return -EIO;
+                       }
+               }
+
+               nfp_offset += pos;
+               totlen += pos;
+               nfp_cpp_area_release(area);
+               nfp_cpp_area_free(area);
+
+               count -= pos;
+               curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ?
+                        NFP_CPP_MEMIO_BOUNDARY : count;
+       }
+
+       return 0;
+}
+
+/*
+ * Serving a read request to NFP from host programs. The request
+ * sends the read size and the CPP target. The bridge makes use
+ * of CPP interface handler configured by the PMD setup. The read
+ * data is sent to the requester using the same socket.
+ */
+static int
+nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp)
+{
+       struct nfp_cpp_area *area;
+       off_t offset, nfp_offset;
+       uint32_t cpp_id, pos, len;
+       uint32_t tmpbuf[16];
+       size_t count, curlen, totlen = 0;
+       int err = 0;
+
+       PMD_CPP_LOG(DEBUG, "%s: offset size %lu, count_size: %lu\n", __func__,
+               sizeof(off_t), sizeof(size_t));
+
+       /* Reading the count param */
+       err = recv(sockfd, &count, sizeof(off_t), 0);
+       if (err != sizeof(off_t))
+               return -EINVAL;
+
+       curlen = count;
+
+       /* Reading the offset param */
+       err = recv(sockfd, &offset, sizeof(off_t), 0);
+       if (err != sizeof(off_t))
+               return -EINVAL;
+
+       /* Obtain target's CPP ID and offset in target */
+       cpp_id = (offset >> 40) << 8;
+       nfp_offset = offset & ((1ull << 40) - 1);
+
+       PMD_CPP_LOG(DEBUG, "%s: count %lu and offset %ld\n", __func__, count,
+                          offset);
+       PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %ld\n", __func__,
+                          cpp_id, nfp_offset);
+
+       /* Adjust length if not aligned */
+       if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) !=
+           (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) {
+               curlen = NFP_CPP_MEMIO_BOUNDARY -
+                       (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1));
+       }
+
+       while (count > 0) {
+               area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev",
+                                                   nfp_offset, curlen);
+               if (!area) {
+                       RTE_LOG(ERR, PMD, "%s: area alloc failed\n", __func__);
+                       return -EIO;
+               }
+
+               err = nfp_cpp_area_acquire(area);
+               if (err < 0) {
+                       RTE_LOG(ERR, PMD, "area acquire failed\n");
+                       nfp_cpp_area_free(area);
+                       return -EIO;
+               }
+
+               for (pos = 0; pos < curlen; pos += len) {
+                       len = curlen - pos;
+                       if (len > sizeof(tmpbuf))
+                               len = sizeof(tmpbuf);
+
+                       err = nfp_cpp_area_read(area, pos, tmpbuf, len);
+                       if (err < 0) {
+                               RTE_LOG(ERR, PMD, "nfp_cpp_area_read error\n");
+                               nfp_cpp_area_release(area);
+                               nfp_cpp_area_free(area);
+                               return -EIO;
+                       }
+                       PMD_CPP_LOG(DEBUG, "%s: sending %u of %lu\n", __func__,
+                                          len, count);
+
+                       err = send(sockfd, tmpbuf, len, 0);
+                       if (err != (int)len) {
+                               RTE_LOG(ERR, PMD,
+                                       "%s: error when sending: %d of %lu\n",
+                                       __func__, err, count);
+                               nfp_cpp_area_release(area);
+                               nfp_cpp_area_free(area);
+                               return -EIO;
+                       }
+               }
+
+               nfp_offset += pos;
+               totlen += pos;
+               nfp_cpp_area_release(area);
+               nfp_cpp_area_free(area);
+
+               count -= pos;
+               curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ?
+                       NFP_CPP_MEMIO_BOUNDARY : count;
+       }
+       return 0;
+}
+
+#define NFP_IOCTL 'n'
+#define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t)
+/*
+ * Serving a ioctl command from host NFP tools. This usually goes to
+ * a kernel driver char driver but it is not available when the PF is
+ * bound to the PMD. Currently just one ioctl command is served and it
+ * does not require any CPP access at all.
+ */
+static int
+nfp_cpp_bridge_serve_ioctl(int sockfd, struct nfp_cpp *cpp)
+{
+       uint32_t cmd, ident_size, tmp;
+       int err;
+
+       /* Reading now the IOCTL command */
+       err = recv(sockfd, &cmd, 4, 0);
+       if (err != 4) {
+               RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__);
+               return -EIO;
+       }
+
+       /* Only supporting NFP_IOCTL_CPP_IDENTIFICATION */
+       if (cmd != NFP_IOCTL_CPP_IDENTIFICATION) {
+               RTE_LOG(ERR, PMD, "%s: unknown cmd %d\n", __func__, cmd);
+               return -EINVAL;
+       }
+
+       err = recv(sockfd, &ident_size, 4, 0);
+       if (err != 4) {
+               RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__);
+               return -EIO;
+       }
+
+       tmp = nfp_cpp_model(cpp);
+
+       PMD_CPP_LOG(DEBUG, "%s: sending NFP model %08x\n", __func__, tmp);
+
+       err = send(sockfd, &tmp, 4, 0);
+       if (err != 4) {
+               RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__);
+               return -EIO;
+       }
+
+       tmp = cpp->interface;
+
+       PMD_CPP_LOG(DEBUG, "%s: sending NFP interface %08x\n", __func__, tmp);
+
+       err = send(sockfd, &tmp, 4, 0);
+       if (err != 4) {
+               RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+#define NFP_BRIDGE_OP_READ     20
+#define NFP_BRIDGE_OP_WRITE    30
+#define NFP_BRIDGE_OP_IOCTL    40
+
+/*
+ * This is the code to be executed by a service core. The CPP bridge interface
+ * is based on a unix socket and requests usually received by a kernel char
+ * driver, read, write and ioctl, are handled by the CPP bridge. NFP host tools
+ * can be executed with a wrapper library and LD_LIBRARY being completely
+ * unaware of the CPP bridge performing the NFP kernel char driver for CPP
+ * accesses.
+ */
+static int32_t
+nfp_cpp_bridge_service_func(void *args)
+{
+       struct sockaddr address;
+       struct nfp_cpp *cpp = args;
+       int sockfd, datafd, op, ret;
+
+       unlink("/tmp/nfp_cpp");
+       sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (sockfd < 0) {
+               RTE_LOG(ERR, PMD, "%s: socket creation error. Service failed\n",
+                       __func__);
+               return -EIO;
+       }
+
+       memset(&address, 0, sizeof(struct sockaddr));
+
+       address.sa_family = AF_UNIX;
+       strcpy(address.sa_data, "/tmp/nfp_cpp");
+
+       ret = bind(sockfd, (const struct sockaddr *)&address,
+                  sizeof(struct sockaddr));
+       if (ret < 0) {
+               RTE_LOG(ERR, PMD, "%s: bind error (%d). Service failed\n",
+                                 __func__, errno);
+               close(sockfd);
+               return ret;
+       }
+
+       ret = listen(sockfd, 20);
+       if (ret < 0) {
+               RTE_LOG(ERR, PMD, "%s: listen error(%d). Service failed\n",
+                                 __func__, errno);
+               close(sockfd);
+               return ret;
+       }
+
+       for (;;) {
+               datafd = accept(sockfd, NULL, NULL);
+               if (datafd < 0) {
+                       RTE_LOG(ERR, PMD, "%s: accept call error (%d)\n",
+                                         __func__, errno);
+                       RTE_LOG(ERR, PMD, "%s: service failed\n", __func__);
+                       close(sockfd);
+                       return -EIO;
+               }
+
+               while (1) {
+                       ret = recv(datafd, &op, 4, 0);
+                       if (ret <= 0) {
+                               PMD_CPP_LOG(DEBUG, "%s: socket close\n",
+                                                  __func__);
+                               break;
+                       }
+
+                       PMD_CPP_LOG(DEBUG, "%s: getting op %u\n", __func__, op);
+
+                       if (op == NFP_BRIDGE_OP_READ)
+                               nfp_cpp_bridge_serve_read(datafd, cpp);
+
+                       if (op == NFP_BRIDGE_OP_WRITE)
+                               nfp_cpp_bridge_serve_write(datafd, cpp);
+
+                       if (op == NFP_BRIDGE_OP_IOCTL)
+                               nfp_cpp_bridge_serve_ioctl(datafd, cpp);
+
+                       if (op == 0)
+                               break;
+               }
+               close(datafd);
+       }
+       close(sockfd);
+
+       return 0;
+}
+
 static int
 nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
                  struct nfp_cpp *cpp, struct nfp_hwinfo *hwinfo,
                  int phys_port, struct nfp_rtsym_table *sym_tbl, void **priv)
 {
        struct rte_eth_dev *eth_dev;
-       struct nfp_net_hw *hw;
+       struct nfp_net_hw *hw = NULL;
        char *port_name;
-       int ret;
+       struct rte_service_spec service;
+       int retval;
 
        port_name = rte_zmalloc("nfp_pf_port_name", 100, 0);
        if (!port_name)
                return -ENOMEM;
 
        if (ports > 1)
-               sprintf(port_name, "%s_port%d", dev->device.name, port);
+               snprintf(port_name, 100, "%s_port%d", dev->device.name, port);
        else
-               sprintf(port_name, "%s", dev->device.name);
+               strlcat(port_name, dev->device.name, 100);
 
-       eth_dev = rte_eth_dev_allocate(port_name);
-       if (!eth_dev)
-               return -ENOMEM;
 
-       if (port == 0) {
-               *priv = rte_zmalloc(port_name,
-                                   sizeof(struct nfp_net_adapter) * ports,
-                                   RTE_CACHE_LINE_SIZE);
-               if (!*priv) {
-                       rte_eth_dev_release_port(eth_dev);
-                       return -ENOMEM;
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               eth_dev = rte_eth_dev_allocate(port_name);
+               if (!eth_dev) {
+                       rte_free(port_name);
+                       return -ENODEV;
                }
-       }
-
-       eth_dev->data->dev_private = *priv;
-
-       /*
-        * dev_private pointing to port0 dev_private because we need
-        * to configure vNIC bars based on port0 at nfp_net_init.
-        * Then dev_private is adjusted per port.
-        */
-       hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
-       hw->cpp = cpp;
-       hw->hwinfo = hwinfo;
-       hw->sym_tbl = sym_tbl;
-       hw->pf_port_idx = phys_port;
-       hw->is_pf = 1;
-       if (ports > 1)
-               hw->pf_multiport_enabled = 1;
+               if (port == 0) {
+                       *priv = rte_zmalloc(port_name,
+                                           sizeof(struct nfp_net_adapter) *
+                                           ports, RTE_CACHE_LINE_SIZE);
+                       if (!*priv) {
+                               rte_free(port_name);
+                               rte_eth_dev_release_port(eth_dev);
+                               return -ENOMEM;
+                       }
+               }
+               eth_dev->data->dev_private = *priv;
 
-       hw->total_ports = ports;
+               /*
+                * dev_private pointing to port0 dev_private because we need
+                * to configure vNIC bars based on port0 at nfp_net_init.
+                * Then dev_private is adjusted per port.
+                */
+               hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
+               hw->cpp = cpp;
+               hw->hwinfo = hwinfo;
+               hw->sym_tbl = sym_tbl;
+               hw->pf_port_idx = phys_port;
+               hw->is_pf = 1;
+               if (ports > 1)
+                       hw->pf_multiport_enabled = 1;
+
+               hw->total_ports = ports;
+       } else {
+               eth_dev = rte_eth_dev_attach_secondary(port_name);
+               if (!eth_dev) {
+                       RTE_LOG(ERR, EAL, "secondary process attach failed, "
+                               "ethdev doesn't exist");
+                       rte_free(port_name);
+                       return -ENODEV;
+               }
+               eth_dev->process_private = cpp;
+       }
 
        eth_dev->device = &dev->device;
        rte_eth_copy_pci_info(eth_dev, dev);
 
-       ret = nfp_net_init(eth_dev);
+       retval = nfp_net_init(eth_dev);
 
-       if (ret)
-               rte_eth_dev_release_port(eth_dev);
-       else
+       if (retval) {
+               retval = -ENODEV;
+               goto probe_failed;
+       } else {
                rte_eth_dev_probing_finish(eth_dev);
+       }
 
        rte_free(port_name);
 
-       return ret;
+       if (port == 0) {
+               /*
+                * The rte_service needs to be created just once per PMD.
+                * And the cpp handler needs to be linked to the service.
+                * Secondary processes will be used for debugging DPDK apps
+                * when requiring to use the CPP interface for accessing NFP
+                * components. And the cpp handler for secondary processes is
+                * available at this point.
+                */
+               memset(&service, 0, sizeof(struct rte_service_spec));
+               snprintf(service.name, sizeof(service.name), "nfp_cpp_service");
+               service.callback = nfp_cpp_bridge_service_func;
+               service.callback_userdata = (void *)cpp;
+
+               hw = (struct nfp_net_hw *)(eth_dev->data->dev_private);
+
+               if (rte_service_component_register(&service,
+                                                  &hw->nfp_cpp_service_id))
+                       RTE_LOG(ERR, PMD, "NFP CPP bridge service register() failed");
+               else
+                       RTE_LOG(DEBUG, PMD, "NFP CPP bridge service registered");
+       }
+
+       return retval;
+
+probe_failed:
+       rte_free(port_name);
+       /* free ports private data if primary process */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               rte_free(eth_dev->data->dev_private);
+
+       rte_eth_dev_release_port(eth_dev);
+
+       return retval;
 }
 
 #define DEFAULT_FW_PATH       "/lib/firmware/netronome"
@@ -2991,28 +3484,31 @@ nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card)
        /* Looking for firmware file in order of priority */
 
        /* First try to find a firmware image specific for this device */
-       sprintf(serial, "serial-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x",
+       snprintf(serial, sizeof(serial),
+                       "serial-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x",
                cpp->serial[0], cpp->serial[1], cpp->serial[2], cpp->serial[3],
                cpp->serial[4], cpp->serial[5], cpp->interface >> 8,
                cpp->interface & 0xff);
 
-       sprintf(fw_name, "%s/%s.nffw", DEFAULT_FW_PATH, serial);
+       snprintf(fw_name, sizeof(fw_name), "%s/%s.nffw", DEFAULT_FW_PATH,
+                       serial);
 
        PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
        fw_f = open(fw_name, O_RDONLY);
-       if (fw_f > 0)
+       if (fw_f >= 0)
                goto read_fw;
 
        /* Then try the PCI name */
-       sprintf(fw_name, "%s/pci-%s.nffw", DEFAULT_FW_PATH, dev->device.name);
+       snprintf(fw_name, sizeof(fw_name), "%s/pci-%s.nffw", DEFAULT_FW_PATH,
+                       dev->device.name);
 
        PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
        fw_f = open(fw_name, O_RDONLY);
-       if (fw_f > 0)
+       if (fw_f >= 0)
                goto read_fw;
 
        /* Finally try the card type and media */
-       sprintf(fw_name, "%s/%s", DEFAULT_FW_PATH, card);
+       snprintf(fw_name, sizeof(fw_name), "%s/%s", DEFAULT_FW_PATH, card);
        PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
        fw_f = open(fw_name, O_RDONLY);
        if (fw_f < 0) {
@@ -3088,8 +3584,9 @@ nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp,
 
        PMD_DRV_LOG(INFO, "Port speed: %u", nfp_eth_table->ports[0].speed);
 
-       sprintf(card_desc, "nic_%s_%dx%d.nffw", nfp_fw_model,
-               nfp_eth_table->count, nfp_eth_table->ports[0].speed / 1000);
+       snprintf(card_desc, sizeof(card_desc), "nic_%s_%dx%d.nffw",
+                       nfp_fw_model, nfp_eth_table->count,
+                       nfp_eth_table->ports[0].speed / 1000);
 
        nsp = nfp_nsp_open(cpp);
        if (!nsp) {
@@ -3120,7 +3617,18 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        if (!dev)
                return ret;
 
-       cpp = nfp_cpp_from_device_name(dev->device.name);
+       /*
+        * When device bound to UIO, the device could be used, by mistake,
+        * by two DPDK apps, and the UIO driver does not avoid it. This
+        * could lead to a serious problem when configuring the NFP CPP
+        * interface. Here we avoid this telling to the CPP init code to
+        * use a lock file if UIO is being used.
+        */
+       if (dev->kdrv == RTE_KDRV_VFIO)
+               cpp = nfp_cpp_from_device_name(dev, 0);
+       else
+               cpp = nfp_cpp_from_device_name(dev, 1);
+
        if (!cpp) {
                PMD_DRV_LOG(ERR, "A CPP handle can not be obtained");
                ret = -EIO;
@@ -3139,10 +3647,12 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                return -EIO;
        }
 
-       if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) {
-               PMD_DRV_LOG(INFO, "Error when uploading firmware");
-               ret = -EIO;
-               goto error;
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) {
+                       PMD_DRV_LOG(INFO, "Error when uploading firmware");
+                       ret = -EIO;
+                       goto error;
+               }
        }
 
        /* Now the symbol table should be there */
@@ -3249,14 +3759,16 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_nfp_net_pf_pmd = {
        .id_table = pci_id_nfp_pf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = nfp_pf_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
 
 static struct rte_pci_driver rte_nfp_net_vf_pmd = {
        .id_table = pci_id_nfp_vf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = eth_nfp_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
@@ -3268,9 +3780,7 @@ RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
 
-RTE_INIT(nfp_init_log);
-static void
-nfp_init_log(void)
+RTE_INIT(nfp_init_log)
 {
        nfp_logtype_init = rte_log_register("pmd.net.nfp.init");
        if (nfp_logtype_init >= 0)