net/nfp: fix device start/stop for VFs
[dpdk.git] / drivers / net / nfp / nfp_net.c
index d3b8ec0..05a44a2 100644 (file)
@@ -411,10 +411,6 @@ nfp_net_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* Checking RX offloads */
-       if (!(rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP))
-               PMD_INIT_LOG(INFO, "HW does strip CRC. No configurable!");
-
        return 0;
 }
 
@@ -527,7 +523,7 @@ nfp_net_vf_read_mac(struct nfp_net_hw *hw)
        uint32_t tmp;
 
        tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR));
-       memcpy(&hw->mac_addr[0], &tmp, sizeof(struct ether_addr));
+       memcpy(&hw->mac_addr[0], &tmp, 4);
 
        tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR + 4));
        memcpy(&hw->mac_addr[4], &tmp, 2);
@@ -770,9 +766,14 @@ nfp_net_start(struct rte_eth_dev *dev)
                goto error;
        }
 
-       if (hw->is_pf)
-               /* Configure the physical port up */
-               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
+       if (hw->is_pf) {
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+                       /* Configure the physical port up */
+                       nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1);
+               else
+                       nfp_eth_set_configured(dev->process_private,
+                                              hw->pf_port_idx, 1);
+       }
 
        hw->ctrl = new_ctrl;
 
@@ -821,9 +822,14 @@ nfp_net_stop(struct rte_eth_dev *dev)
                        (struct nfp_net_rxq *)dev->data->rx_queues[i]);
        }
 
-       if (hw->is_pf)
-               /* Configure the physical port down */
-               nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
+       if (hw->is_pf) {
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+                       /* Configure the physical port down */
+                       nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0);
+               else
+                       nfp_eth_set_configured(dev->process_private,
+                                              hw->pf_port_idx, 0);
+       }
 }
 
 /* Reset and stop device. The device can not be restarted. */
@@ -1202,8 +1208,10 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
        };
 
-       dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
+       dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 |
+                                          ETH_RSS_NONFRAG_IPV4_TCP |
                                           ETH_RSS_NONFRAG_IPV4_UDP |
+                                          ETH_RSS_IPV6 |
                                           ETH_RSS_NONFRAG_IPV6_TCP |
                                           ETH_RSS_NONFRAG_IPV6_UDP;
 
@@ -1436,9 +1444,9 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 
        /* switch to jumbo mode if needed */
        if ((uint32_t)mtu > ETHER_MAX_LEN)
-               dev->data->dev_conf.rxmode.jumbo_frame = 1;
+               dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
        else
-               dev->data->dev_conf.rxmode.jumbo_frame = 0;
+               dev->data->dev_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
 
        /* update max frame size */
        dev->data->dev_conf.rxmode.max_rx_pkt_len = (uint32_t)mtu;
@@ -1508,8 +1516,6 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->rx_count = nb_desc;
        rxq->port_id = dev->data->port_id;
        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
-       rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0
-                                 : ETHER_CRC_LEN);
        rxq->drop_en = rx_conf->rx_drop_en;
 
        /*
@@ -1785,21 +1791,20 @@ nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
                return;
 
        /* If IPv4 and IP checksum error, fail */
-       if ((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))
+       if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
+           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK)))
                mb->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
 
        /* If neither UDP nor TCP return */
        if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
            !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM))
                return;
 
-       if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK))
-               mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-
-       if ((rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK))
+       if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK))
+               mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       else
                mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 }
 
@@ -1883,6 +1888,18 @@ nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
        case NFP_NET_RSS_IPV6_EX:
                mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
                break;
+       case NFP_NET_RSS_IPV4_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV4_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
        default:
                mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK;
        }
@@ -2255,11 +2272,15 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                txq->wr_p = 0;
 
                        pkt_size -= dma_size;
-                       if (!pkt_size)
-                               /* End of packet */
-                               txds->offset_eop |= PCIE_DESC_TX_EOP;
+
+                       /*
+                        * Making the EOP, packets with just one segment
+                        * the priority
+                        */
+                       if (likely(!pkt_size))
+                               txds->offset_eop = PCIE_DESC_TX_EOP;
                        else
-                               txds->offset_eop &= PCIE_DESC_TX_OFFSET_MASK;
+                               txds->offset_eop = 0;
 
                        pkt = pkt->next;
                        /* Referencing next free TX descriptor */
@@ -2460,14 +2481,22 @@ nfp_net_rss_hash_write(struct rte_eth_dev *dev,
        rss_hf = rss_conf->rss_hf;
 
        if (rss_hf & ETH_RSS_IPV4)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 |
-                               NFP_NET_CFG_RSS_IPV4_TCP |
-                               NFP_NET_CFG_RSS_IPV4_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_UDP;
 
        if (rss_hf & ETH_RSS_IPV6)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6 |
-                               NFP_NET_CFG_RSS_IPV6_TCP |
-                               NFP_NET_CFG_RSS_IPV6_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_UDP;
 
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
@@ -2683,6 +2712,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
+       /* NFP can not handle DMA addresses requiring more than 40 bits */
+       if (rte_mem_check_dma_mask(40)) {
+               RTE_LOG(ERR, PMD, "device %s can not be used:",
+                                  pci_dev->device.name);
+               RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n");
+               return -ENODEV;
+       };
+
        if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
            (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
                port = get_pf_port_number(eth_dev->data->name);
@@ -2881,6 +2918,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        ether_addr_copy((struct ether_addr *)hw->mac_addr,
                        &eth_dev->data->mac_addrs[0]);
 
+       if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
+               eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR;
+
        PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x "
                     "mac=%02x:%02x:%02x:%02x:%02x:%02x",
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
@@ -2888,16 +2928,16 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
                     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
 
-       /* Registering LSC interrupt handler */
-       rte_intr_callback_register(&pci_dev->intr_handle,
-                                  nfp_net_dev_interrupt_handler,
-                                  (void *)eth_dev);
-
-       /* Telling the firmware about the LSC interrupt entry */
-       nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
-
-       /* Recording current stats counters values */
-       nfp_net_stats_reset(eth_dev);
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               /* Registering LSC interrupt handler */
+               rte_intr_callback_register(&pci_dev->intr_handle,
+                                          nfp_net_dev_interrupt_handler,
+                                          (void *)eth_dev);
+               /* Telling the firmware about the LSC interrupt entry */
+               nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+               /* Recording current stats counters values */
+               nfp_net_stats_reset(eth_dev);
+       }
 
        return 0;
 
@@ -2917,7 +2957,7 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
        struct rte_eth_dev *eth_dev;
        struct nfp_net_hw *hw;
        char *port_name;
-       int ret;
+       int retval;
 
        port_name = rte_zmalloc("nfp_pf_port_name", 100, 0);
        if (!port_name)
@@ -2928,49 +2968,76 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
        else
                sprintf(port_name, "%s", dev->device.name);
 
-       eth_dev = rte_eth_dev_allocate(port_name);
-       if (!eth_dev)
-               return -ENOMEM;
 
-       if (port == 0) {
-               *priv = rte_zmalloc(port_name,
-                                   sizeof(struct nfp_net_adapter) * ports,
-                                   RTE_CACHE_LINE_SIZE);
-               if (!*priv) {
-                       rte_eth_dev_release_port(eth_dev);
-                       return -ENOMEM;
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               eth_dev = rte_eth_dev_allocate(port_name);
+               if (!eth_dev) {
+                       rte_free(port_name);
+                       return -ENODEV;
                }
-       }
-
-       eth_dev->data->dev_private = *priv;
-
-       /*
-        * dev_private pointing to port0 dev_private because we need
-        * to configure vNIC bars based on port0 at nfp_net_init.
-        * Then dev_private is adjusted per port.
-        */
-       hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
-       hw->cpp = cpp;
-       hw->hwinfo = hwinfo;
-       hw->sym_tbl = sym_tbl;
-       hw->pf_port_idx = phys_port;
-       hw->is_pf = 1;
-       if (ports > 1)
-               hw->pf_multiport_enabled = 1;
+               if (port == 0) {
+                       *priv = rte_zmalloc(port_name,
+                                           sizeof(struct nfp_net_adapter) *
+                                           ports, RTE_CACHE_LINE_SIZE);
+                       if (!*priv) {
+                               rte_free(port_name);
+                               rte_eth_dev_release_port(eth_dev);
+                               return -ENOMEM;
+                       }
+               }
+               eth_dev->data->dev_private = *priv;
 
-       hw->total_ports = ports;
+               /*
+                * dev_private pointing to port0 dev_private because we need
+                * to configure vNIC bars based on port0 at nfp_net_init.
+                * Then dev_private is adjusted per port.
+                */
+               hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
+               hw->cpp = cpp;
+               hw->hwinfo = hwinfo;
+               hw->sym_tbl = sym_tbl;
+               hw->pf_port_idx = phys_port;
+               hw->is_pf = 1;
+               if (ports > 1)
+                       hw->pf_multiport_enabled = 1;
+
+               hw->total_ports = ports;
+       } else {
+               eth_dev = rte_eth_dev_attach_secondary(port_name);
+               if (!eth_dev) {
+                       RTE_LOG(ERR, EAL, "secondary process attach failed, "
+                               "ethdev doesn't exist");
+                       rte_free(port_name);
+                       return -ENODEV;
+               }
+               eth_dev->process_private = cpp;
+       }
 
        eth_dev->device = &dev->device;
        rte_eth_copy_pci_info(eth_dev, dev);
 
-       ret = nfp_net_init(eth_dev);
+       retval = nfp_net_init(eth_dev);
 
-       if (ret)
-               rte_eth_dev_release_port(eth_dev);
+       if (retval) {
+               retval = -ENODEV;
+               goto probe_failed;
+       } else {
+               rte_eth_dev_probing_finish(eth_dev);
+       }
 
        rte_free(port_name);
 
-       return ret;
+       return retval;
+
+probe_failed:
+       rte_free(port_name);
+       /* free ports private data if primary process */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               rte_free(eth_dev->data->dev_private);
+
+       rte_eth_dev_release_port(eth_dev);
+
+       return retval;
 }
 
 #define DEFAULT_FW_PATH       "/lib/firmware/netronome"
@@ -2981,8 +3048,8 @@ nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card)
        struct nfp_cpp *cpp = nsp->cpp;
        int fw_f;
        char *fw_buf;
-       char fw_name[100];
-       char serial[100];
+       char fw_name[125];
+       char serial[40];
        struct stat file_stat;
        off_t fsize, bytes;
 
@@ -3118,7 +3185,18 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        if (!dev)
                return ret;
 
-       cpp = nfp_cpp_from_device_name(dev->device.name);
+       /*
+        * When device bound to UIO, the device could be used, by mistake,
+        * by two DPDK apps, and the UIO driver does not avoid it. This
+        * could lead to a serious problem when configuring the NFP CPP
+        * interface. Here we avoid this telling to the CPP init code to
+        * use a lock file if UIO is being used.
+        */
+       if (dev->kdrv == RTE_KDRV_VFIO)
+               cpp = nfp_cpp_from_device_name(dev, 0);
+       else
+               cpp = nfp_cpp_from_device_name(dev, 1);
+
        if (!cpp) {
                PMD_DRV_LOG(ERR, "A CPP handle can not be obtained");
                ret = -EIO;
@@ -3137,10 +3215,12 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                return -EIO;
        }
 
-       if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) {
-               PMD_DRV_LOG(INFO, "Error when uploading firmware");
-               ret = -EIO;
-               goto error;
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) {
+                       PMD_DRV_LOG(INFO, "Error when uploading firmware");
+                       ret = -EIO;
+                       goto error;
+               }
        }
 
        /* Now the symbol table should be there */
@@ -3247,14 +3327,16 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_nfp_net_pf_pmd = {
        .id_table = pci_id_nfp_pf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = nfp_pf_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
 
 static struct rte_pci_driver rte_nfp_net_vf_pmd = {
        .id_table = pci_id_nfp_vf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = eth_nfp_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
@@ -3266,9 +3348,7 @@ RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
 
-RTE_INIT(nfp_init_log);
-static void
-nfp_init_log(void)
+RTE_INIT(nfp_init_log)
 {
        nfp_logtype_init = rte_log_register("pmd.net.nfp.init");
        if (nfp_logtype_init >= 0)