de0de808f4252c012fa654a2601e1c638f53f173
[dpdk.git] / drivers / net / nfp / nfp_net.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2014-2018 Netronome Systems, Inc.
3  * All rights reserved.
4  *
5  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
6  */
7
8 /*
9  * vim:shiftwidth=8:noexpandtab
10  *
11  * @file dpdk/pmd/nfp_net.c
12  *
13  * Netronome vNIC DPDK Poll-Mode Driver: Main entry point
14  */
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_pci.h>
22 #include <rte_dev.h>
23 #include <rte_ether.h>
24 #include <rte_malloc.h>
25 #include <rte_memzone.h>
26 #include <rte_mempool.h>
27 #include <rte_version.h>
28 #include <rte_string_fns.h>
29 #include <rte_alarm.h>
30 #include <rte_spinlock.h>
31 #include <rte_service_component.h>
32
33 #include "eal_firmware.h"
34
35 #include "nfpcore/nfp_cpp.h"
36 #include "nfpcore/nfp_nffw.h"
37 #include "nfpcore/nfp_hwinfo.h"
38 #include "nfpcore/nfp_mip.h"
39 #include "nfpcore/nfp_rtsym.h"
40 #include "nfpcore/nfp_nsp.h"
41
42 #include "nfp_net_pmd.h"
43 #include "nfp_rxtx.h"
44 #include "nfp_net_logs.h"
45 #include "nfp_net_ctrl.h"
46
47 #include <sys/types.h>
48 #include <sys/socket.h>
49 #include <sys/un.h>
50 #include <unistd.h>
51 #include <stdio.h>
52 #include <sys/ioctl.h>
53 #include <errno.h>
54
55 /* Prototypes */
56 static int nfp_net_close(struct rte_eth_dev *dev);
57 static int nfp_net_configure(struct rte_eth_dev *dev);
58 static void nfp_net_dev_interrupt_handler(void *param);
59 static void nfp_net_dev_interrupt_delayed_handler(void *param);
60 static int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
61 static int nfp_net_infos_get(struct rte_eth_dev *dev,
62                              struct rte_eth_dev_info *dev_info);
63 static int nfp_net_init(struct rte_eth_dev *eth_dev);
64 static int nfp_pf_init(struct rte_pci_device *pci_dev);
65 static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev);
66 static int nfp_pci_uninit(struct rte_eth_dev *eth_dev);
67 static int nfp_init_phyports(struct nfp_pf_dev *pf_dev);
68 static int nfp_net_link_update(struct rte_eth_dev *dev, int wait_to_complete);
69 static int nfp_net_promisc_enable(struct rte_eth_dev *dev);
70 static int nfp_net_promisc_disable(struct rte_eth_dev *dev);
71 static int nfp_net_start(struct rte_eth_dev *dev);
72 static int nfp_net_stats_get(struct rte_eth_dev *dev,
73                               struct rte_eth_stats *stats);
74 static int nfp_net_stats_reset(struct rte_eth_dev *dev);
75 static int nfp_net_stop(struct rte_eth_dev *dev);
76 static int nfp_net_rss_config_default(struct rte_eth_dev *dev);
77 static int nfp_net_rss_hash_update(struct rte_eth_dev *dev,
78                                    struct rte_eth_rss_conf *rss_conf);
79 static int nfp_net_rss_reta_write(struct rte_eth_dev *dev,
80                     struct rte_eth_rss_reta_entry64 *reta_conf,
81                     uint16_t reta_size);
82 static int nfp_net_rss_hash_write(struct rte_eth_dev *dev,
83                         struct rte_eth_rss_conf *rss_conf);
84 static int nfp_set_mac_addr(struct rte_eth_dev *dev,
85                              struct rte_ether_addr *mac_addr);
86 static int32_t nfp_cpp_bridge_service_func(void *args);
87 static void nfp_register_cpp_service(struct nfp_cpp *cpp);
88 static int nfp_fw_setup(struct rte_pci_device *dev,
89                         struct nfp_cpp *cpp,
90                         struct nfp_eth_table *nfp_eth_table,
91                         struct nfp_hwinfo *hwinfo);
92
93 static int
94 __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update)
95 {
96         int cnt;
97         uint32_t new;
98         struct timespec wait;
99
100         PMD_DRV_LOG(DEBUG, "Writing to the configuration queue (%p)...",
101                     hw->qcp_cfg);
102
103         if (hw->qcp_cfg == NULL)
104                 rte_panic("Bad configuration queue pointer\n");
105
106         nfp_qcp_ptr_add(hw->qcp_cfg, NFP_QCP_WRITE_PTR, 1);
107
108         wait.tv_sec = 0;
109         wait.tv_nsec = 1000000;
110
111         PMD_DRV_LOG(DEBUG, "Polling for update ack...");
112
113         /* Poll update field, waiting for NFP to ack the config */
114         for (cnt = 0; ; cnt++) {
115                 new = nn_cfg_readl(hw, NFP_NET_CFG_UPDATE);
116                 if (new == 0)
117                         break;
118                 if (new & NFP_NET_CFG_UPDATE_ERR) {
119                         PMD_INIT_LOG(ERR, "Reconfig error: 0x%08x", new);
120                         return -1;
121                 }
122                 if (cnt >= NFP_NET_POLL_TIMEOUT) {
123                         PMD_INIT_LOG(ERR, "Reconfig timeout for 0x%08x after"
124                                           " %dms", update, cnt);
125                         rte_panic("Exiting\n");
126                 }
127                 nanosleep(&wait, 0); /* waiting for a 1ms */
128         }
129         PMD_DRV_LOG(DEBUG, "Ack DONE");
130         return 0;
131 }
132
133 /*
134  * Reconfigure the NIC
135  * @nn:    device to reconfigure
136  * @ctrl:    The value for the ctrl field in the BAR config
137  * @update:  The value for the update field in the BAR config
138  *
139  * Write the update word to the BAR and ping the reconfig queue. Then poll
140  * until the firmware has acknowledged the update by zeroing the update word.
141  */
142 static int
143 nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update)
144 {
145         uint32_t err;
146
147         PMD_DRV_LOG(DEBUG, "nfp_net_reconfig: ctrl=%08x update=%08x",
148                     ctrl, update);
149
150         rte_spinlock_lock(&hw->reconfig_lock);
151
152         nn_cfg_writel(hw, NFP_NET_CFG_CTRL, ctrl);
153         nn_cfg_writel(hw, NFP_NET_CFG_UPDATE, update);
154
155         rte_wmb();
156
157         err = __nfp_net_reconfig(hw, update);
158
159         rte_spinlock_unlock(&hw->reconfig_lock);
160
161         if (!err)
162                 return 0;
163
164         /*
165          * Reconfig errors imply situations where they can be handled.
166          * Otherwise, rte_panic is called inside __nfp_net_reconfig
167          */
168         PMD_INIT_LOG(ERR, "Error nfp_net reconfig for ctrl: %x update: %x",
169                      ctrl, update);
170         return -EIO;
171 }
172
173 /*
174  * Configure an Ethernet device. This function must be invoked first
175  * before any other function in the Ethernet API. This function can
176  * also be re-invoked when a device is in the stopped state.
177  */
178 static int
179 nfp_net_configure(struct rte_eth_dev *dev)
180 {
181         struct rte_eth_conf *dev_conf;
182         struct rte_eth_rxmode *rxmode;
183         struct rte_eth_txmode *txmode;
184         struct nfp_net_hw *hw;
185
186         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
187
188         /*
189          * A DPDK app sends info about how many queues to use and how
190          * those queues need to be configured. This is used by the
191          * DPDK core and it makes sure no more queues than those
192          * advertised by the driver are requested. This function is
193          * called after that internal process
194          */
195
196         PMD_INIT_LOG(DEBUG, "Configure");
197
198         dev_conf = &dev->data->dev_conf;
199         rxmode = &dev_conf->rxmode;
200         txmode = &dev_conf->txmode;
201
202         if (rxmode->mq_mode & ETH_MQ_RX_RSS_FLAG)
203                 rxmode->offloads |= DEV_RX_OFFLOAD_RSS_HASH;
204
205         /* Checking TX mode */
206         if (txmode->mq_mode) {
207                 PMD_INIT_LOG(INFO, "TX mq_mode DCB and VMDq not supported");
208                 return -EINVAL;
209         }
210
211         /* Checking RX mode */
212         if (rxmode->mq_mode & ETH_MQ_RX_RSS &&
213             !(hw->cap & NFP_NET_CFG_CTRL_RSS)) {
214                 PMD_INIT_LOG(INFO, "RSS not supported");
215                 return -EINVAL;
216         }
217
218         return 0;
219 }
220
221 static void
222 nfp_net_enable_queues(struct rte_eth_dev *dev)
223 {
224         struct nfp_net_hw *hw;
225         uint64_t enabled_queues = 0;
226         int i;
227
228         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
229
230         /* Enabling the required TX queues in the device */
231         for (i = 0; i < dev->data->nb_tx_queues; i++)
232                 enabled_queues |= (1 << i);
233
234         nn_cfg_writeq(hw, NFP_NET_CFG_TXRS_ENABLE, enabled_queues);
235
236         enabled_queues = 0;
237
238         /* Enabling the required RX queues in the device */
239         for (i = 0; i < dev->data->nb_rx_queues; i++)
240                 enabled_queues |= (1 << i);
241
242         nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, enabled_queues);
243 }
244
245 static void
246 nfp_net_disable_queues(struct rte_eth_dev *dev)
247 {
248         struct nfp_net_hw *hw;
249         uint32_t new_ctrl, update = 0;
250
251         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
252
253         nn_cfg_writeq(hw, NFP_NET_CFG_TXRS_ENABLE, 0);
254         nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, 0);
255
256         new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_ENABLE;
257         update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING |
258                  NFP_NET_CFG_UPDATE_MSIX;
259
260         if (hw->cap & NFP_NET_CFG_CTRL_RINGCFG)
261                 new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
262
263         /* If an error when reconfig we avoid to change hw state */
264         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
265                 return;
266
267         hw->ctrl = new_ctrl;
268 }
269
270 static void
271 nfp_net_params_setup(struct nfp_net_hw *hw)
272 {
273         nn_cfg_writel(hw, NFP_NET_CFG_MTU, hw->mtu);
274         nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, hw->flbufsz);
275 }
276
277 static void
278 nfp_net_cfg_queue_setup(struct nfp_net_hw *hw)
279 {
280         hw->qcp_cfg = hw->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
281 }
282
283 #define ETH_ADDR_LEN    6
284
285 static void
286 nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src)
287 {
288         int i;
289
290         for (i = 0; i < ETH_ADDR_LEN; i++)
291                 dst[i] = src[i];
292 }
293
294 static int
295 nfp_net_pf_read_mac(struct nfp_pf_dev *pf_dev, int port)
296 {
297         struct nfp_eth_table *nfp_eth_table;
298         struct nfp_net_hw *hw = NULL;
299
300         /* Grab a pointer to the correct physical port */
301         hw = pf_dev->ports[port];
302
303         nfp_eth_table = nfp_eth_read_ports(pf_dev->cpp);
304
305         nfp_eth_copy_mac((uint8_t *)&hw->mac_addr,
306                          (uint8_t *)&nfp_eth_table->ports[port].mac_addr);
307
308         free(nfp_eth_table);
309         return 0;
310 }
311
312 static void
313 nfp_net_vf_read_mac(struct nfp_net_hw *hw)
314 {
315         uint32_t tmp;
316
317         tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR));
318         memcpy(&hw->mac_addr[0], &tmp, 4);
319
320         tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR + 4));
321         memcpy(&hw->mac_addr[4], &tmp, 2);
322 }
323
324 static void
325 nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac)
326 {
327         uint32_t mac0 = *(uint32_t *)mac;
328         uint16_t mac1;
329
330         nn_writel(rte_cpu_to_be_32(mac0), hw->ctrl_bar + NFP_NET_CFG_MACADDR);
331
332         mac += 4;
333         mac1 = *(uint16_t *)mac;
334         nn_writew(rte_cpu_to_be_16(mac1),
335                   hw->ctrl_bar + NFP_NET_CFG_MACADDR + 6);
336 }
337
338 int
339 nfp_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
340 {
341         struct nfp_net_hw *hw;
342         uint32_t update, ctrl;
343
344         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
345         if ((hw->ctrl & NFP_NET_CFG_CTRL_ENABLE) &&
346             !(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)) {
347                 PMD_INIT_LOG(INFO, "MAC address unable to change when"
348                                   " port enabled");
349                 return -EBUSY;
350         }
351
352         if ((hw->ctrl & NFP_NET_CFG_CTRL_ENABLE) &&
353             !(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
354                 return -EBUSY;
355
356         /* Writing new MAC to the specific port BAR address */
357         nfp_net_write_mac(hw, (uint8_t *)mac_addr);
358
359         /* Signal the NIC about the change */
360         update = NFP_NET_CFG_UPDATE_MACADDR;
361         ctrl = hw->ctrl;
362         if ((hw->ctrl & NFP_NET_CFG_CTRL_ENABLE) &&
363             (hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
364                 ctrl |= NFP_NET_CFG_CTRL_LIVE_ADDR;
365         if (nfp_net_reconfig(hw, ctrl, update) < 0) {
366                 PMD_INIT_LOG(INFO, "MAC address update failed");
367                 return -EIO;
368         }
369         return 0;
370 }
371
372 static int
373 nfp_configure_rx_interrupt(struct rte_eth_dev *dev,
374                            struct rte_intr_handle *intr_handle)
375 {
376         struct nfp_net_hw *hw;
377         int i;
378
379         if (!intr_handle->intr_vec) {
380                 intr_handle->intr_vec =
381                         rte_zmalloc("intr_vec",
382                                     dev->data->nb_rx_queues * sizeof(int), 0);
383                 if (!intr_handle->intr_vec) {
384                         PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
385                                      " intr_vec", dev->data->nb_rx_queues);
386                         return -ENOMEM;
387                 }
388         }
389
390         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
391
392         if (intr_handle->type == RTE_INTR_HANDLE_UIO) {
393                 PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with UIO");
394                 /* UIO just supports one queue and no LSC*/
395                 nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(0), 0);
396                 intr_handle->intr_vec[0] = 0;
397         } else {
398                 PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with VFIO");
399                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
400                         /*
401                          * The first msix vector is reserved for non
402                          * efd interrupts
403                         */
404                         nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(i), i + 1);
405                         intr_handle->intr_vec[i] = i + 1;
406                         PMD_INIT_LOG(DEBUG, "intr_vec[%d]= %d", i,
407                                             intr_handle->intr_vec[i]);
408                 }
409         }
410
411         /* Avoiding TX interrupts */
412         hw->ctrl |= NFP_NET_CFG_CTRL_MSIX_TX_OFF;
413         return 0;
414 }
415
416 static uint32_t
417 nfp_check_offloads(struct rte_eth_dev *dev)
418 {
419         struct nfp_net_hw *hw;
420         struct rte_eth_conf *dev_conf;
421         struct rte_eth_rxmode *rxmode;
422         struct rte_eth_txmode *txmode;
423         uint32_t ctrl = 0;
424
425         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
426
427         dev_conf = &dev->data->dev_conf;
428         rxmode = &dev_conf->rxmode;
429         txmode = &dev_conf->txmode;
430
431         if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) {
432                 if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM)
433                         ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
434         }
435
436         if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
437                 if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN)
438                         ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
439         }
440
441         if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
442                 hw->mtu = rxmode->max_rx_pkt_len;
443
444         if (txmode->offloads & DEV_TX_OFFLOAD_VLAN_INSERT)
445                 ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
446
447         /* L2 broadcast */
448         if (hw->cap & NFP_NET_CFG_CTRL_L2BC)
449                 ctrl |= NFP_NET_CFG_CTRL_L2BC;
450
451         /* L2 multicast */
452         if (hw->cap & NFP_NET_CFG_CTRL_L2MC)
453                 ctrl |= NFP_NET_CFG_CTRL_L2MC;
454
455         /* TX checksum offload */
456         if (txmode->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM ||
457             txmode->offloads & DEV_TX_OFFLOAD_UDP_CKSUM ||
458             txmode->offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
459                 ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
460
461         /* LSO offload */
462         if (txmode->offloads & DEV_TX_OFFLOAD_TCP_TSO) {
463                 if (hw->cap & NFP_NET_CFG_CTRL_LSO)
464                         ctrl |= NFP_NET_CFG_CTRL_LSO;
465                 else
466                         ctrl |= NFP_NET_CFG_CTRL_LSO2;
467         }
468
469         /* RX gather */
470         if (txmode->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
471                 ctrl |= NFP_NET_CFG_CTRL_GATHER;
472
473         return ctrl;
474 }
475
476 static int
477 nfp_net_start(struct rte_eth_dev *dev)
478 {
479         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
480         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
481         uint32_t new_ctrl, update = 0;
482         struct nfp_net_hw *hw;
483         struct nfp_pf_dev *pf_dev;
484         struct rte_eth_conf *dev_conf;
485         struct rte_eth_rxmode *rxmode;
486         uint32_t intr_vector;
487         int ret;
488
489         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
490         pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(dev->data->dev_private);
491
492         PMD_INIT_LOG(DEBUG, "Start");
493
494         /* Disabling queues just in case... */
495         nfp_net_disable_queues(dev);
496
497         /* Enabling the required queues in the device */
498         nfp_net_enable_queues(dev);
499
500         /* check and configure queue intr-vector mapping */
501         if (dev->data->dev_conf.intr_conf.rxq != 0) {
502                 if (pf_dev->multiport) {
503                         PMD_INIT_LOG(ERR, "PMD rx interrupt is not supported "
504                                           "with NFP multiport PF");
505                                 return -EINVAL;
506                 }
507                 if (intr_handle->type == RTE_INTR_HANDLE_UIO) {
508                         /*
509                          * Better not to share LSC with RX interrupts.
510                          * Unregistering LSC interrupt handler
511                          */
512                         rte_intr_callback_unregister(&pci_dev->intr_handle,
513                                 nfp_net_dev_interrupt_handler, (void *)dev);
514
515                         if (dev->data->nb_rx_queues > 1) {
516                                 PMD_INIT_LOG(ERR, "PMD rx interrupt only "
517                                              "supports 1 queue with UIO");
518                                 return -EIO;
519                         }
520                 }
521                 intr_vector = dev->data->nb_rx_queues;
522                 if (rte_intr_efd_enable(intr_handle, intr_vector))
523                         return -1;
524
525                 nfp_configure_rx_interrupt(dev, intr_handle);
526                 update = NFP_NET_CFG_UPDATE_MSIX;
527         }
528
529         rte_intr_enable(intr_handle);
530
531         new_ctrl = nfp_check_offloads(dev);
532
533         /* Writing configuration parameters in the device */
534         nfp_net_params_setup(hw);
535
536         dev_conf = &dev->data->dev_conf;
537         rxmode = &dev_conf->rxmode;
538
539         if (rxmode->mq_mode & ETH_MQ_RX_RSS) {
540                 nfp_net_rss_config_default(dev);
541                 update |= NFP_NET_CFG_UPDATE_RSS;
542                 new_ctrl |= NFP_NET_CFG_CTRL_RSS;
543         }
544
545         /* Enable device */
546         new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
547
548         update |= NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING;
549
550         if (hw->cap & NFP_NET_CFG_CTRL_RINGCFG)
551                 new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
552
553         nn_cfg_writel(hw, NFP_NET_CFG_CTRL, new_ctrl);
554         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
555                 return -EIO;
556
557         /*
558          * Allocating rte mbufs for configured rx queues.
559          * This requires queues being enabled before
560          */
561         if (nfp_net_rx_freelist_setup(dev) < 0) {
562                 ret = -ENOMEM;
563                 goto error;
564         }
565
566         if (hw->is_phyport) {
567                 if (rte_eal_process_type() == RTE_PROC_PRIMARY)
568                         /* Configure the physical port up */
569                         nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 1);
570                 else
571                         nfp_eth_set_configured(dev->process_private,
572                                                hw->nfp_idx, 1);
573         }
574
575         hw->ctrl = new_ctrl;
576
577         return 0;
578
579 error:
580         /*
581          * An error returned by this function should mean the app
582          * exiting and then the system releasing all the memory
583          * allocated even memory coming from hugepages.
584          *
585          * The device could be enabled at this point with some queues
586          * ready for getting packets. This is true if the call to
587          * nfp_net_rx_freelist_setup() succeeds for some queues but
588          * fails for subsequent queues.
589          *
590          * This should make the app exiting but better if we tell the
591          * device first.
592          */
593         nfp_net_disable_queues(dev);
594
595         return ret;
596 }
597
598 /* Stop device: disable rx and tx functions to allow for reconfiguring. */
599 static int
600 nfp_net_stop(struct rte_eth_dev *dev)
601 {
602         int i;
603         struct nfp_net_hw *hw;
604
605         PMD_INIT_LOG(DEBUG, "Stop");
606
607         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
608
609         nfp_net_disable_queues(dev);
610
611         /* Clear queues */
612         for (i = 0; i < dev->data->nb_tx_queues; i++) {
613                 nfp_net_reset_tx_queue(
614                         (struct nfp_net_txq *)dev->data->tx_queues[i]);
615         }
616
617         for (i = 0; i < dev->data->nb_rx_queues; i++) {
618                 nfp_net_reset_rx_queue(
619                         (struct nfp_net_rxq *)dev->data->rx_queues[i]);
620         }
621
622         if (hw->is_phyport) {
623                 if (rte_eal_process_type() == RTE_PROC_PRIMARY)
624                         /* Configure the physical port down */
625                         nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 0);
626                 else
627                         nfp_eth_set_configured(dev->process_private,
628                                                hw->nfp_idx, 0);
629         }
630
631         return 0;
632 }
633
634 /* Set the link up. */
635 static int
636 nfp_net_set_link_up(struct rte_eth_dev *dev)
637 {
638         struct nfp_net_hw *hw;
639
640         PMD_DRV_LOG(DEBUG, "Set link up");
641
642         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
643
644         if (!hw->is_phyport)
645                 return -ENOTSUP;
646
647         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
648                 /* Configure the physical port down */
649                 return nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 1);
650         else
651                 return nfp_eth_set_configured(dev->process_private,
652                                               hw->nfp_idx, 1);
653 }
654
655 /* Set the link down. */
656 static int
657 nfp_net_set_link_down(struct rte_eth_dev *dev)
658 {
659         struct nfp_net_hw *hw;
660
661         PMD_DRV_LOG(DEBUG, "Set link down");
662
663         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
664
665         if (!hw->is_phyport)
666                 return -ENOTSUP;
667
668         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
669                 /* Configure the physical port down */
670                 return nfp_eth_set_configured(hw->cpp, hw->nfp_idx, 0);
671         else
672                 return nfp_eth_set_configured(dev->process_private,
673                                               hw->nfp_idx, 0);
674 }
675
676 /* Reset and stop device. The device can not be restarted. */
677 static int
678 nfp_net_close(struct rte_eth_dev *dev)
679 {
680         struct nfp_net_hw *hw;
681         struct rte_pci_device *pci_dev;
682         int i;
683
684         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
685                 return 0;
686
687         PMD_INIT_LOG(DEBUG, "Close");
688
689         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
690         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
691
692         /*
693          * We assume that the DPDK application is stopping all the
694          * threads/queues before calling the device close function.
695          */
696
697         nfp_net_disable_queues(dev);
698
699         /* Clear queues */
700         for (i = 0; i < dev->data->nb_tx_queues; i++) {
701                 nfp_net_reset_tx_queue(
702                         (struct nfp_net_txq *)dev->data->tx_queues[i]);
703         }
704
705         for (i = 0; i < dev->data->nb_rx_queues; i++) {
706                 nfp_net_reset_rx_queue(
707                         (struct nfp_net_rxq *)dev->data->rx_queues[i]);
708         }
709
710         /* Only free PF resources after all physical ports have been closed */
711         if (pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC ||
712             pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC) {
713                 struct nfp_pf_dev *pf_dev;
714                 pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(dev->data->dev_private);
715
716                 /* Mark this port as unused and free device priv resources*/
717                 nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff);
718                 pf_dev->ports[hw->idx] = NULL;
719                 rte_eth_dev_release_port(dev);
720
721                 for (i = 0; i < pf_dev->total_phyports; i++) {
722                         /* Check to see if ports are still in use */
723                         if (pf_dev->ports[i])
724                                 return 0;
725                 }
726
727                 /* Now it is safe to free all PF resources */
728                 PMD_INIT_LOG(INFO, "Freeing PF resources");
729                 nfp_cpp_area_free(pf_dev->ctrl_area);
730                 nfp_cpp_area_free(pf_dev->hwqueues_area);
731                 free(pf_dev->hwinfo);
732                 free(pf_dev->sym_tbl);
733                 nfp_cpp_free(pf_dev->cpp);
734                 rte_free(pf_dev);
735         }
736
737         rte_intr_disable(&pci_dev->intr_handle);
738
739         /* unregister callback func from eal lib */
740         rte_intr_callback_unregister(&pci_dev->intr_handle,
741                                      nfp_net_dev_interrupt_handler,
742                                      (void *)dev);
743
744         /*
745          * The ixgbe PMD driver disables the pcie master on the
746          * device. The i40e does not...
747          */
748
749         return 0;
750 }
751
752 static int
753 nfp_net_promisc_enable(struct rte_eth_dev *dev)
754 {
755         uint32_t new_ctrl, update = 0;
756         struct nfp_net_hw *hw;
757         int ret;
758
759         PMD_DRV_LOG(DEBUG, "Promiscuous mode enable");
760
761         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
762
763         if (!(hw->cap & NFP_NET_CFG_CTRL_PROMISC)) {
764                 PMD_INIT_LOG(INFO, "Promiscuous mode not supported");
765                 return -ENOTSUP;
766         }
767
768         if (hw->ctrl & NFP_NET_CFG_CTRL_PROMISC) {
769                 PMD_DRV_LOG(INFO, "Promiscuous mode already enabled");
770                 return 0;
771         }
772
773         new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_PROMISC;
774         update = NFP_NET_CFG_UPDATE_GEN;
775
776         /*
777          * DPDK sets promiscuous mode on just after this call assuming
778          * it can not fail ...
779          */
780         ret = nfp_net_reconfig(hw, new_ctrl, update);
781         if (ret < 0)
782                 return ret;
783
784         hw->ctrl = new_ctrl;
785
786         return 0;
787 }
788
789 static int
790 nfp_net_promisc_disable(struct rte_eth_dev *dev)
791 {
792         uint32_t new_ctrl, update = 0;
793         struct nfp_net_hw *hw;
794         int ret;
795
796         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
797
798         if ((hw->ctrl & NFP_NET_CFG_CTRL_PROMISC) == 0) {
799                 PMD_DRV_LOG(INFO, "Promiscuous mode already disabled");
800                 return 0;
801         }
802
803         new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_PROMISC;
804         update = NFP_NET_CFG_UPDATE_GEN;
805
806         /*
807          * DPDK sets promiscuous mode off just before this call
808          * assuming it can not fail ...
809          */
810         ret = nfp_net_reconfig(hw, new_ctrl, update);
811         if (ret < 0)
812                 return ret;
813
814         hw->ctrl = new_ctrl;
815
816         return 0;
817 }
818
819 /*
820  * return 0 means link status changed, -1 means not changed
821  *
822  * Wait to complete is needed as it can take up to 9 seconds to get the Link
823  * status.
824  */
825 static int
826 nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
827 {
828         struct nfp_net_hw *hw;
829         struct rte_eth_link link;
830         uint32_t nn_link_status;
831         int ret;
832
833         static const uint32_t ls_to_ethtool[] = {
834                 [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = ETH_SPEED_NUM_NONE,
835                 [NFP_NET_CFG_STS_LINK_RATE_UNKNOWN]     = ETH_SPEED_NUM_NONE,
836                 [NFP_NET_CFG_STS_LINK_RATE_1G]          = ETH_SPEED_NUM_1G,
837                 [NFP_NET_CFG_STS_LINK_RATE_10G]         = ETH_SPEED_NUM_10G,
838                 [NFP_NET_CFG_STS_LINK_RATE_25G]         = ETH_SPEED_NUM_25G,
839                 [NFP_NET_CFG_STS_LINK_RATE_40G]         = ETH_SPEED_NUM_40G,
840                 [NFP_NET_CFG_STS_LINK_RATE_50G]         = ETH_SPEED_NUM_50G,
841                 [NFP_NET_CFG_STS_LINK_RATE_100G]        = ETH_SPEED_NUM_100G,
842         };
843
844         PMD_DRV_LOG(DEBUG, "Link update");
845
846         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
847
848         nn_link_status = nn_cfg_readl(hw, NFP_NET_CFG_STS);
849
850         memset(&link, 0, sizeof(struct rte_eth_link));
851
852         if (nn_link_status & NFP_NET_CFG_STS_LINK)
853                 link.link_status = ETH_LINK_UP;
854
855         link.link_duplex = ETH_LINK_FULL_DUPLEX;
856
857         nn_link_status = (nn_link_status >> NFP_NET_CFG_STS_LINK_RATE_SHIFT) &
858                          NFP_NET_CFG_STS_LINK_RATE_MASK;
859
860         if (nn_link_status >= RTE_DIM(ls_to_ethtool))
861                 link.link_speed = ETH_SPEED_NUM_NONE;
862         else
863                 link.link_speed = ls_to_ethtool[nn_link_status];
864
865         ret = rte_eth_linkstatus_set(dev, &link);
866         if (ret == 0) {
867                 if (link.link_status)
868                         PMD_DRV_LOG(INFO, "NIC Link is Up");
869                 else
870                         PMD_DRV_LOG(INFO, "NIC Link is Down");
871         }
872         return ret;
873 }
874
875 static int
876 nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
877 {
878         int i;
879         struct nfp_net_hw *hw;
880         struct rte_eth_stats nfp_dev_stats;
881
882         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
883
884         /* RTE_ETHDEV_QUEUE_STAT_CNTRS default value is 16 */
885
886         memset(&nfp_dev_stats, 0, sizeof(nfp_dev_stats));
887
888         /* reading per RX ring stats */
889         for (i = 0; i < dev->data->nb_rx_queues; i++) {
890                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
891                         break;
892
893                 nfp_dev_stats.q_ipackets[i] =
894                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i));
895
896                 nfp_dev_stats.q_ipackets[i] -=
897                         hw->eth_stats_base.q_ipackets[i];
898
899                 nfp_dev_stats.q_ibytes[i] =
900                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i) + 0x8);
901
902                 nfp_dev_stats.q_ibytes[i] -=
903                         hw->eth_stats_base.q_ibytes[i];
904         }
905
906         /* reading per TX ring stats */
907         for (i = 0; i < dev->data->nb_tx_queues; i++) {
908                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
909                         break;
910
911                 nfp_dev_stats.q_opackets[i] =
912                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i));
913
914                 nfp_dev_stats.q_opackets[i] -=
915                         hw->eth_stats_base.q_opackets[i];
916
917                 nfp_dev_stats.q_obytes[i] =
918                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i) + 0x8);
919
920                 nfp_dev_stats.q_obytes[i] -=
921                         hw->eth_stats_base.q_obytes[i];
922         }
923
924         nfp_dev_stats.ipackets =
925                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_FRAMES);
926
927         nfp_dev_stats.ipackets -= hw->eth_stats_base.ipackets;
928
929         nfp_dev_stats.ibytes =
930                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_OCTETS);
931
932         nfp_dev_stats.ibytes -= hw->eth_stats_base.ibytes;
933
934         nfp_dev_stats.opackets =
935                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_FRAMES);
936
937         nfp_dev_stats.opackets -= hw->eth_stats_base.opackets;
938
939         nfp_dev_stats.obytes =
940                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
941
942         nfp_dev_stats.obytes -= hw->eth_stats_base.obytes;
943
944         /* reading general device stats */
945         nfp_dev_stats.ierrors =
946                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
947
948         nfp_dev_stats.ierrors -= hw->eth_stats_base.ierrors;
949
950         nfp_dev_stats.oerrors =
951                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
952
953         nfp_dev_stats.oerrors -= hw->eth_stats_base.oerrors;
954
955         /* RX ring mbuf allocation failures */
956         nfp_dev_stats.rx_nombuf = dev->data->rx_mbuf_alloc_failed;
957
958         nfp_dev_stats.imissed =
959                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_DISCARDS);
960
961         nfp_dev_stats.imissed -= hw->eth_stats_base.imissed;
962
963         if (stats) {
964                 memcpy(stats, &nfp_dev_stats, sizeof(*stats));
965                 return 0;
966         }
967         return -EINVAL;
968 }
969
970 static int
971 nfp_net_stats_reset(struct rte_eth_dev *dev)
972 {
973         int i;
974         struct nfp_net_hw *hw;
975
976         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
977
978         /*
979          * hw->eth_stats_base records the per counter starting point.
980          * Lets update it now
981          */
982
983         /* reading per RX ring stats */
984         for (i = 0; i < dev->data->nb_rx_queues; i++) {
985                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
986                         break;
987
988                 hw->eth_stats_base.q_ipackets[i] =
989                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i));
990
991                 hw->eth_stats_base.q_ibytes[i] =
992                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i) + 0x8);
993         }
994
995         /* reading per TX ring stats */
996         for (i = 0; i < dev->data->nb_tx_queues; i++) {
997                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
998                         break;
999
1000                 hw->eth_stats_base.q_opackets[i] =
1001                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i));
1002
1003                 hw->eth_stats_base.q_obytes[i] =
1004                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i) + 0x8);
1005         }
1006
1007         hw->eth_stats_base.ipackets =
1008                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_FRAMES);
1009
1010         hw->eth_stats_base.ibytes =
1011                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_OCTETS);
1012
1013         hw->eth_stats_base.opackets =
1014                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_FRAMES);
1015
1016         hw->eth_stats_base.obytes =
1017                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
1018
1019         /* reading general device stats */
1020         hw->eth_stats_base.ierrors =
1021                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
1022
1023         hw->eth_stats_base.oerrors =
1024                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
1025
1026         /* RX ring mbuf allocation failures */
1027         dev->data->rx_mbuf_alloc_failed = 0;
1028
1029         hw->eth_stats_base.imissed =
1030                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_DISCARDS);
1031
1032         return 0;
1033 }
1034
1035 static int
1036 nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1037 {
1038         struct nfp_net_hw *hw;
1039
1040         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1041
1042         dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
1043         dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
1044         dev_info->min_rx_bufsize = RTE_ETHER_MIN_MTU;
1045         dev_info->max_rx_pktlen = hw->max_mtu;
1046         /* Next should change when PF support is implemented */
1047         dev_info->max_mac_addrs = 1;
1048
1049         if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN)
1050                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1051
1052         if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM)
1053                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM |
1054                                              DEV_RX_OFFLOAD_UDP_CKSUM |
1055                                              DEV_RX_OFFLOAD_TCP_CKSUM;
1056
1057         if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
1058                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
1059
1060         if (hw->cap & NFP_NET_CFG_CTRL_TXCSUM)
1061                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM |
1062                                              DEV_TX_OFFLOAD_UDP_CKSUM |
1063                                              DEV_TX_OFFLOAD_TCP_CKSUM;
1064
1065         if (hw->cap & NFP_NET_CFG_CTRL_LSO_ANY)
1066                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
1067
1068         if (hw->cap & NFP_NET_CFG_CTRL_GATHER)
1069                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_MULTI_SEGS;
1070
1071         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1072                 .rx_thresh = {
1073                         .pthresh = DEFAULT_RX_PTHRESH,
1074                         .hthresh = DEFAULT_RX_HTHRESH,
1075                         .wthresh = DEFAULT_RX_WTHRESH,
1076                 },
1077                 .rx_free_thresh = DEFAULT_RX_FREE_THRESH,
1078                 .rx_drop_en = 0,
1079         };
1080
1081         dev_info->default_txconf = (struct rte_eth_txconf) {
1082                 .tx_thresh = {
1083                         .pthresh = DEFAULT_TX_PTHRESH,
1084                         .hthresh = DEFAULT_TX_HTHRESH,
1085                         .wthresh = DEFAULT_TX_WTHRESH,
1086                 },
1087                 .tx_free_thresh = DEFAULT_TX_FREE_THRESH,
1088                 .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
1089         };
1090
1091         dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
1092                 .nb_max = NFP_NET_MAX_RX_DESC,
1093                 .nb_min = NFP_NET_MIN_RX_DESC,
1094                 .nb_align = NFP_ALIGN_RING_DESC,
1095         };
1096
1097         dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
1098                 .nb_max = NFP_NET_MAX_TX_DESC,
1099                 .nb_min = NFP_NET_MIN_TX_DESC,
1100                 .nb_align = NFP_ALIGN_RING_DESC,
1101                 .nb_seg_max = NFP_TX_MAX_SEG,
1102                 .nb_mtu_seg_max = NFP_TX_MAX_MTU_SEG,
1103         };
1104
1105         /* All NFP devices support jumbo frames */
1106         dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
1107
1108         if (hw->cap & NFP_NET_CFG_CTRL_RSS) {
1109                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_RSS_HASH;
1110
1111                 dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 |
1112                                                    ETH_RSS_NONFRAG_IPV4_TCP |
1113                                                    ETH_RSS_NONFRAG_IPV4_UDP |
1114                                                    ETH_RSS_IPV6 |
1115                                                    ETH_RSS_NONFRAG_IPV6_TCP |
1116                                                    ETH_RSS_NONFRAG_IPV6_UDP;
1117
1118                 dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
1119                 dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
1120         }
1121
1122         dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G |
1123                                ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G |
1124                                ETH_LINK_SPEED_50G | ETH_LINK_SPEED_100G;
1125
1126         return 0;
1127 }
1128
1129 static const uint32_t *
1130 nfp_net_supported_ptypes_get(struct rte_eth_dev *dev)
1131 {
1132         static const uint32_t ptypes[] = {
1133                 /* refers to nfp_net_set_hash() */
1134                 RTE_PTYPE_INNER_L3_IPV4,
1135                 RTE_PTYPE_INNER_L3_IPV6,
1136                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1137                 RTE_PTYPE_INNER_L4_MASK,
1138                 RTE_PTYPE_UNKNOWN
1139         };
1140
1141         if (dev->rx_pkt_burst == nfp_net_recv_pkts)
1142                 return ptypes;
1143         return NULL;
1144 }
1145
1146 static int
1147 nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1148 {
1149         struct rte_pci_device *pci_dev;
1150         struct nfp_net_hw *hw;
1151         int base = 0;
1152
1153         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1154         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1155
1156         if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UIO)
1157                 base = 1;
1158
1159         /* Make sure all updates are written before un-masking */
1160         rte_wmb();
1161         nn_cfg_writeb(hw, NFP_NET_CFG_ICR(base + queue_id),
1162                       NFP_NET_CFG_ICR_UNMASKED);
1163         return 0;
1164 }
1165
1166 static int
1167 nfp_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1168 {
1169         struct rte_pci_device *pci_dev;
1170         struct nfp_net_hw *hw;
1171         int base = 0;
1172
1173         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1174         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1175
1176         if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UIO)
1177                 base = 1;
1178
1179         /* Make sure all updates are written before un-masking */
1180         rte_wmb();
1181         nn_cfg_writeb(hw, NFP_NET_CFG_ICR(base + queue_id), 0x1);
1182         return 0;
1183 }
1184
1185 static void
1186 nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
1187 {
1188         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1189         struct rte_eth_link link;
1190
1191         rte_eth_linkstatus_get(dev, &link);
1192         if (link.link_status)
1193                 PMD_DRV_LOG(INFO, "Port %d: Link Up - speed %u Mbps - %s",
1194                             dev->data->port_id, link.link_speed,
1195                             link.link_duplex == ETH_LINK_FULL_DUPLEX
1196                             ? "full-duplex" : "half-duplex");
1197         else
1198                 PMD_DRV_LOG(INFO, " Port %d: Link Down",
1199                             dev->data->port_id);
1200
1201         PMD_DRV_LOG(INFO, "PCI Address: " PCI_PRI_FMT,
1202                     pci_dev->addr.domain, pci_dev->addr.bus,
1203                     pci_dev->addr.devid, pci_dev->addr.function);
1204 }
1205
1206 /* Interrupt configuration and handling */
1207
1208 /*
1209  * nfp_net_irq_unmask - Unmask an interrupt
1210  *
1211  * If MSI-X auto-masking is enabled clear the mask bit, otherwise
1212  * clear the ICR for the entry.
1213  */
1214 static void
1215 nfp_net_irq_unmask(struct rte_eth_dev *dev)
1216 {
1217         struct nfp_net_hw *hw;
1218         struct rte_pci_device *pci_dev;
1219
1220         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1221         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1222
1223         if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
1224                 /* If MSI-X auto-masking is used, clear the entry */
1225                 rte_wmb();
1226                 rte_intr_ack(&pci_dev->intr_handle);
1227         } else {
1228                 /* Make sure all updates are written before un-masking */
1229                 rte_wmb();
1230                 nn_cfg_writeb(hw, NFP_NET_CFG_ICR(NFP_NET_IRQ_LSC_IDX),
1231                               NFP_NET_CFG_ICR_UNMASKED);
1232         }
1233 }
1234
1235 static void
1236 nfp_net_dev_interrupt_handler(void *param)
1237 {
1238         int64_t timeout;
1239         struct rte_eth_link link;
1240         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
1241
1242         PMD_DRV_LOG(DEBUG, "We got a LSC interrupt!!!");
1243
1244         rte_eth_linkstatus_get(dev, &link);
1245
1246         nfp_net_link_update(dev, 0);
1247
1248         /* likely to up */
1249         if (!link.link_status) {
1250                 /* handle it 1 sec later, wait it being stable */
1251                 timeout = NFP_NET_LINK_UP_CHECK_TIMEOUT;
1252                 /* likely to down */
1253         } else {
1254                 /* handle it 4 sec later, wait it being stable */
1255                 timeout = NFP_NET_LINK_DOWN_CHECK_TIMEOUT;
1256         }
1257
1258         if (rte_eal_alarm_set(timeout * 1000,
1259                               nfp_net_dev_interrupt_delayed_handler,
1260                               (void *)dev) < 0) {
1261                 PMD_INIT_LOG(ERR, "Error setting alarm");
1262                 /* Unmasking */
1263                 nfp_net_irq_unmask(dev);
1264         }
1265 }
1266
1267 /*
1268  * Interrupt handler which shall be registered for alarm callback for delayed
1269  * handling specific interrupt to wait for the stable nic state. As the NIC
1270  * interrupt state is not stable for nfp after link is just down, it needs
1271  * to wait 4 seconds to get the stable status.
1272  *
1273  * @param handle   Pointer to interrupt handle.
1274  * @param param    The address of parameter (struct rte_eth_dev *)
1275  *
1276  * @return  void
1277  */
1278 static void
1279 nfp_net_dev_interrupt_delayed_handler(void *param)
1280 {
1281         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
1282
1283         nfp_net_link_update(dev, 0);
1284         rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
1285
1286         nfp_net_dev_link_status_print(dev);
1287
1288         /* Unmasking */
1289         nfp_net_irq_unmask(dev);
1290 }
1291
1292 static int
1293 nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1294 {
1295         struct nfp_net_hw *hw;
1296
1297         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1298
1299         /* check that mtu is within the allowed range */
1300         if (mtu < RTE_ETHER_MIN_MTU || (uint32_t)mtu > hw->max_mtu)
1301                 return -EINVAL;
1302
1303         /* mtu setting is forbidden if port is started */
1304         if (dev->data->dev_started) {
1305                 PMD_DRV_LOG(ERR, "port %d must be stopped before configuration",
1306                             dev->data->port_id);
1307                 return -EBUSY;
1308         }
1309
1310         /* switch to jumbo mode if needed */
1311         if ((uint32_t)mtu > RTE_ETHER_MTU)
1312                 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
1313         else
1314                 dev->data->dev_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1315
1316         /* update max frame size */
1317         dev->data->dev_conf.rxmode.max_rx_pkt_len = (uint32_t)mtu;
1318
1319         /* writing to configuration space */
1320         nn_cfg_writel(hw, NFP_NET_CFG_MTU, (uint32_t)mtu);
1321
1322         hw->mtu = mtu;
1323
1324         return 0;
1325 }
1326
1327 static int
1328 nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
1329 {
1330         uint32_t new_ctrl, update;
1331         struct nfp_net_hw *hw;
1332         int ret;
1333
1334         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1335         new_ctrl = 0;
1336
1337         /* Enable vlan strip if it is not configured yet */
1338         if ((mask & ETH_VLAN_STRIP_OFFLOAD) &&
1339             !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN))
1340                 new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN;
1341
1342         /* Disable vlan strip just if it is configured */
1343         if (!(mask & ETH_VLAN_STRIP_OFFLOAD) &&
1344             (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN))
1345                 new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN;
1346
1347         if (new_ctrl == 0)
1348                 return 0;
1349
1350         update = NFP_NET_CFG_UPDATE_GEN;
1351
1352         ret = nfp_net_reconfig(hw, new_ctrl, update);
1353         if (!ret)
1354                 hw->ctrl = new_ctrl;
1355
1356         return ret;
1357 }
1358
1359 static int
1360 nfp_net_rss_reta_write(struct rte_eth_dev *dev,
1361                     struct rte_eth_rss_reta_entry64 *reta_conf,
1362                     uint16_t reta_size)
1363 {
1364         uint32_t reta, mask;
1365         int i, j;
1366         int idx, shift;
1367         struct nfp_net_hw *hw =
1368                 NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1369
1370         if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) {
1371                 PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
1372                         "(%d) doesn't match the number hardware can supported "
1373                         "(%d)", reta_size, NFP_NET_CFG_RSS_ITBL_SZ);
1374                 return -EINVAL;
1375         }
1376
1377         /*
1378          * Update Redirection Table. There are 128 8bit-entries which can be
1379          * manage as 32 32bit-entries
1380          */
1381         for (i = 0; i < reta_size; i += 4) {
1382                 /* Handling 4 RSS entries per loop */
1383                 idx = i / RTE_RETA_GROUP_SIZE;
1384                 shift = i % RTE_RETA_GROUP_SIZE;
1385                 mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xF);
1386
1387                 if (!mask)
1388                         continue;
1389
1390                 reta = 0;
1391                 /* If all 4 entries were set, don't need read RETA register */
1392                 if (mask != 0xF)
1393                         reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + i);
1394
1395                 for (j = 0; j < 4; j++) {
1396                         if (!(mask & (0x1 << j)))
1397                                 continue;
1398                         if (mask != 0xF)
1399                                 /* Clearing the entry bits */
1400                                 reta &= ~(0xFF << (8 * j));
1401                         reta |= reta_conf[idx].reta[shift + j] << (8 * j);
1402                 }
1403                 nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) + shift,
1404                               reta);
1405         }
1406         return 0;
1407 }
1408
1409 /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
1410 static int
1411 nfp_net_reta_update(struct rte_eth_dev *dev,
1412                     struct rte_eth_rss_reta_entry64 *reta_conf,
1413                     uint16_t reta_size)
1414 {
1415         struct nfp_net_hw *hw =
1416                 NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1417         uint32_t update;
1418         int ret;
1419
1420         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
1421                 return -EINVAL;
1422
1423         ret = nfp_net_rss_reta_write(dev, reta_conf, reta_size);
1424         if (ret != 0)
1425                 return ret;
1426
1427         update = NFP_NET_CFG_UPDATE_RSS;
1428
1429         if (nfp_net_reconfig(hw, hw->ctrl, update) < 0)
1430                 return -EIO;
1431
1432         return 0;
1433 }
1434
1435  /* Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. */
1436 static int
1437 nfp_net_reta_query(struct rte_eth_dev *dev,
1438                    struct rte_eth_rss_reta_entry64 *reta_conf,
1439                    uint16_t reta_size)
1440 {
1441         uint8_t i, j, mask;
1442         int idx, shift;
1443         uint32_t reta;
1444         struct nfp_net_hw *hw;
1445
1446         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1447
1448         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
1449                 return -EINVAL;
1450
1451         if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) {
1452                 PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
1453                         "(%d) doesn't match the number hardware can supported "
1454                         "(%d)", reta_size, NFP_NET_CFG_RSS_ITBL_SZ);
1455                 return -EINVAL;
1456         }
1457
1458         /*
1459          * Reading Redirection Table. There are 128 8bit-entries which can be
1460          * manage as 32 32bit-entries
1461          */
1462         for (i = 0; i < reta_size; i += 4) {
1463                 /* Handling 4 RSS entries per loop */
1464                 idx = i / RTE_RETA_GROUP_SIZE;
1465                 shift = i % RTE_RETA_GROUP_SIZE;
1466                 mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xF);
1467
1468                 if (!mask)
1469                         continue;
1470
1471                 reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) +
1472                                     shift);
1473                 for (j = 0; j < 4; j++) {
1474                         if (!(mask & (0x1 << j)))
1475                                 continue;
1476                         reta_conf[idx].reta[shift + j] =
1477                                 (uint8_t)((reta >> (8 * j)) & 0xF);
1478                 }
1479         }
1480         return 0;
1481 }
1482
1483 static int
1484 nfp_net_rss_hash_write(struct rte_eth_dev *dev,
1485                         struct rte_eth_rss_conf *rss_conf)
1486 {
1487         struct nfp_net_hw *hw;
1488         uint64_t rss_hf;
1489         uint32_t cfg_rss_ctrl = 0;
1490         uint8_t key;
1491         int i;
1492
1493         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1494
1495         /* Writing the key byte a byte */
1496         for (i = 0; i < rss_conf->rss_key_len; i++) {
1497                 memcpy(&key, &rss_conf->rss_key[i], 1);
1498                 nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key);
1499         }
1500
1501         rss_hf = rss_conf->rss_hf;
1502
1503         if (rss_hf & ETH_RSS_IPV4)
1504                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4;
1505
1506         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1507                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_TCP;
1508
1509         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1510                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_UDP;
1511
1512         if (rss_hf & ETH_RSS_IPV6)
1513                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6;
1514
1515         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1516                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_TCP;
1517
1518         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1519                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_UDP;
1520
1521         cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
1522         cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
1523
1524         /* configuring where to apply the RSS hash */
1525         nn_cfg_writel(hw, NFP_NET_CFG_RSS_CTRL, cfg_rss_ctrl);
1526
1527         /* Writing the key size */
1528         nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY_SZ, rss_conf->rss_key_len);
1529
1530         return 0;
1531 }
1532
1533 static int
1534 nfp_net_rss_hash_update(struct rte_eth_dev *dev,
1535                         struct rte_eth_rss_conf *rss_conf)
1536 {
1537         uint32_t update;
1538         uint64_t rss_hf;
1539         struct nfp_net_hw *hw;
1540
1541         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1542
1543         rss_hf = rss_conf->rss_hf;
1544
1545         /* Checking if RSS is enabled */
1546         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) {
1547                 if (rss_hf != 0) { /* Enable RSS? */
1548                         PMD_DRV_LOG(ERR, "RSS unsupported");
1549                         return -EINVAL;
1550                 }
1551                 return 0; /* Nothing to do */
1552         }
1553
1554         if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) {
1555                 PMD_DRV_LOG(ERR, "hash key too long");
1556                 return -EINVAL;
1557         }
1558
1559         nfp_net_rss_hash_write(dev, rss_conf);
1560
1561         update = NFP_NET_CFG_UPDATE_RSS;
1562
1563         if (nfp_net_reconfig(hw, hw->ctrl, update) < 0)
1564                 return -EIO;
1565
1566         return 0;
1567 }
1568
1569 static int
1570 nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev,
1571                           struct rte_eth_rss_conf *rss_conf)
1572 {
1573         uint64_t rss_hf;
1574         uint32_t cfg_rss_ctrl;
1575         uint8_t key;
1576         int i;
1577         struct nfp_net_hw *hw;
1578
1579         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1580
1581         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
1582                 return -EINVAL;
1583
1584         rss_hf = rss_conf->rss_hf;
1585         cfg_rss_ctrl = nn_cfg_readl(hw, NFP_NET_CFG_RSS_CTRL);
1586
1587         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4)
1588                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP;
1589
1590         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4_TCP)
1591                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1592
1593         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6_TCP)
1594                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
1595
1596         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4_UDP)
1597                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
1598
1599         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6_UDP)
1600                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
1601
1602         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6)
1603                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP;
1604
1605         /* Propagate current RSS hash functions to caller */
1606         rss_conf->rss_hf = rss_hf;
1607
1608         /* Reading the key size */
1609         rss_conf->rss_key_len = nn_cfg_readl(hw, NFP_NET_CFG_RSS_KEY_SZ);
1610
1611         /* Reading the key byte a byte */
1612         for (i = 0; i < rss_conf->rss_key_len; i++) {
1613                 key = nn_cfg_readb(hw, NFP_NET_CFG_RSS_KEY + i);
1614                 memcpy(&rss_conf->rss_key[i], &key, 1);
1615         }
1616
1617         return 0;
1618 }
1619
1620 static int
1621 nfp_net_rss_config_default(struct rte_eth_dev *dev)
1622 {
1623         struct rte_eth_conf *dev_conf;
1624         struct rte_eth_rss_conf rss_conf;
1625         struct rte_eth_rss_reta_entry64 nfp_reta_conf[2];
1626         uint16_t rx_queues = dev->data->nb_rx_queues;
1627         uint16_t queue;
1628         int i, j, ret;
1629
1630         PMD_DRV_LOG(INFO, "setting default RSS conf for %u queues",
1631                 rx_queues);
1632
1633         nfp_reta_conf[0].mask = ~0x0;
1634         nfp_reta_conf[1].mask = ~0x0;
1635
1636         queue = 0;
1637         for (i = 0; i < 0x40; i += 8) {
1638                 for (j = i; j < (i + 8); j++) {
1639                         nfp_reta_conf[0].reta[j] = queue;
1640                         nfp_reta_conf[1].reta[j] = queue++;
1641                         queue %= rx_queues;
1642                 }
1643         }
1644         ret = nfp_net_rss_reta_write(dev, nfp_reta_conf, 0x80);
1645         if (ret != 0)
1646                 return ret;
1647
1648         dev_conf = &dev->data->dev_conf;
1649         if (!dev_conf) {
1650                 PMD_DRV_LOG(INFO, "wrong rss conf");
1651                 return -EINVAL;
1652         }
1653         rss_conf = dev_conf->rx_adv_conf.rss_conf;
1654
1655         ret = nfp_net_rss_hash_write(dev, &rss_conf);
1656
1657         return ret;
1658 }
1659
1660
1661 /* Initialise and register driver with DPDK Application */
1662 static const struct eth_dev_ops nfp_net_eth_dev_ops = {
1663         .dev_configure          = nfp_net_configure,
1664         .dev_start              = nfp_net_start,
1665         .dev_stop               = nfp_net_stop,
1666         .dev_set_link_up        = nfp_net_set_link_up,
1667         .dev_set_link_down      = nfp_net_set_link_down,
1668         .dev_close              = nfp_net_close,
1669         .promiscuous_enable     = nfp_net_promisc_enable,
1670         .promiscuous_disable    = nfp_net_promisc_disable,
1671         .link_update            = nfp_net_link_update,
1672         .stats_get              = nfp_net_stats_get,
1673         .stats_reset            = nfp_net_stats_reset,
1674         .dev_infos_get          = nfp_net_infos_get,
1675         .dev_supported_ptypes_get = nfp_net_supported_ptypes_get,
1676         .mtu_set                = nfp_net_dev_mtu_set,
1677         .mac_addr_set           = nfp_set_mac_addr,
1678         .vlan_offload_set       = nfp_net_vlan_offload_set,
1679         .reta_update            = nfp_net_reta_update,
1680         .reta_query             = nfp_net_reta_query,
1681         .rss_hash_update        = nfp_net_rss_hash_update,
1682         .rss_hash_conf_get      = nfp_net_rss_hash_conf_get,
1683         .rx_queue_setup         = nfp_net_rx_queue_setup,
1684         .rx_queue_release       = nfp_net_rx_queue_release,
1685         .tx_queue_setup         = nfp_net_tx_queue_setup,
1686         .tx_queue_release       = nfp_net_tx_queue_release,
1687         .rx_queue_intr_enable   = nfp_rx_queue_intr_enable,
1688         .rx_queue_intr_disable  = nfp_rx_queue_intr_disable,
1689 };
1690
1691
1692 static int
1693 nfp_net_init(struct rte_eth_dev *eth_dev)
1694 {
1695         struct rte_pci_device *pci_dev;
1696         struct nfp_pf_dev *pf_dev;
1697         struct nfp_net_hw *hw;
1698
1699         uint64_t tx_bar_off = 0, rx_bar_off = 0;
1700         uint32_t start_q;
1701         int stride = 4;
1702         int port = 0;
1703         int err;
1704
1705         PMD_INIT_FUNC_TRACE();
1706
1707         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1708
1709         /* Use backpointer here to the PF of this eth_dev */
1710         pf_dev = NFP_NET_DEV_PRIVATE_TO_PF(eth_dev->data->dev_private);
1711
1712         /* NFP can not handle DMA addresses requiring more than 40 bits */
1713         if (rte_mem_check_dma_mask(40)) {
1714                 RTE_LOG(ERR, PMD, "device %s can not be used:",
1715                                    pci_dev->device.name);
1716                 RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n");
1717                 return -ENODEV;
1718         };
1719
1720         if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
1721             (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
1722                 port = ((struct nfp_net_hw *)eth_dev->data->dev_private)->idx;
1723                 if (port < 0 || port > 7) {
1724                         PMD_DRV_LOG(ERR, "Port value is wrong");
1725                         return -ENODEV;
1726                 }
1727
1728                 /* Use PF array of physical ports to get pointer to
1729                  * this specific port
1730                  */
1731                 hw = pf_dev->ports[port];
1732
1733                 PMD_INIT_LOG(DEBUG, "Working with physical port number: %d, "
1734                                     "NFP internal port number: %d",
1735                                     port, hw->nfp_idx);
1736
1737         } else {
1738                 hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1739         }
1740
1741         eth_dev->dev_ops = &nfp_net_eth_dev_ops;
1742         eth_dev->rx_queue_count = nfp_net_rx_queue_count;
1743         eth_dev->rx_pkt_burst = &nfp_net_recv_pkts;
1744         eth_dev->tx_pkt_burst = &nfp_net_xmit_pkts;
1745
1746         /* For secondary processes, the primary has done all the work */
1747         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1748                 return 0;
1749
1750         rte_eth_copy_pci_info(eth_dev, pci_dev);
1751
1752         hw->device_id = pci_dev->id.device_id;
1753         hw->vendor_id = pci_dev->id.vendor_id;
1754         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1755         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1756
1757         PMD_INIT_LOG(DEBUG, "nfp_net: device (%u:%u) %u:%u:%u:%u",
1758                      pci_dev->id.vendor_id, pci_dev->id.device_id,
1759                      pci_dev->addr.domain, pci_dev->addr.bus,
1760                      pci_dev->addr.devid, pci_dev->addr.function);
1761
1762         hw->ctrl_bar = (uint8_t *)pci_dev->mem_resource[0].addr;
1763         if (hw->ctrl_bar == NULL) {
1764                 PMD_DRV_LOG(ERR,
1765                         "hw->ctrl_bar is NULL. BAR0 not configured");
1766                 return -ENODEV;
1767         }
1768
1769         if (hw->is_phyport) {
1770                 if (port == 0) {
1771                         hw->ctrl_bar = pf_dev->ctrl_bar;
1772                 } else {
1773                         if (!pf_dev->ctrl_bar)
1774                                 return -ENODEV;
1775                         /* Use port offset in pf ctrl_bar for this
1776                          * ports control bar
1777                          */
1778                         hw->ctrl_bar = pf_dev->ctrl_bar +
1779                                        (port * NFP_PF_CSR_SLICE_SIZE);
1780                 }
1781         }
1782
1783         PMD_INIT_LOG(DEBUG, "ctrl bar: %p", hw->ctrl_bar);
1784
1785         hw->max_rx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_RXRINGS);
1786         hw->max_tx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_TXRINGS);
1787
1788         /* Work out where in the BAR the queues start. */
1789         switch (pci_dev->id.device_id) {
1790         case PCI_DEVICE_ID_NFP4000_PF_NIC:
1791         case PCI_DEVICE_ID_NFP6000_PF_NIC:
1792         case PCI_DEVICE_ID_NFP6000_VF_NIC:
1793                 start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ);
1794                 tx_bar_off = (uint64_t)start_q * NFP_QCP_QUEUE_ADDR_SZ;
1795                 start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_RXQ);
1796                 rx_bar_off = (uint64_t)start_q * NFP_QCP_QUEUE_ADDR_SZ;
1797                 break;
1798         default:
1799                 PMD_DRV_LOG(ERR, "nfp_net: no device ID matching");
1800                 err = -ENODEV;
1801                 goto dev_err_ctrl_map;
1802         }
1803
1804         PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "", tx_bar_off);
1805         PMD_INIT_LOG(DEBUG, "rx_bar_off: 0x%" PRIx64 "", rx_bar_off);
1806
1807         if (hw->is_phyport) {
1808                 hw->tx_bar = pf_dev->hw_queues + tx_bar_off;
1809                 hw->rx_bar = pf_dev->hw_queues + rx_bar_off;
1810                 eth_dev->data->dev_private = hw;
1811         } else {
1812                 hw->tx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
1813                              tx_bar_off;
1814                 hw->rx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
1815                              rx_bar_off;
1816         }
1817
1818         PMD_INIT_LOG(DEBUG, "ctrl_bar: %p, tx_bar: %p, rx_bar: %p",
1819                      hw->ctrl_bar, hw->tx_bar, hw->rx_bar);
1820
1821         nfp_net_cfg_queue_setup(hw);
1822
1823         /* Get some of the read-only fields from the config BAR */
1824         hw->ver = nn_cfg_readl(hw, NFP_NET_CFG_VERSION);
1825         hw->cap = nn_cfg_readl(hw, NFP_NET_CFG_CAP);
1826         hw->max_mtu = nn_cfg_readl(hw, NFP_NET_CFG_MAX_MTU);
1827         hw->mtu = RTE_ETHER_MTU;
1828
1829         /* VLAN insertion is incompatible with LSOv2 */
1830         if (hw->cap & NFP_NET_CFG_CTRL_LSO2)
1831                 hw->cap &= ~NFP_NET_CFG_CTRL_TXVLAN;
1832
1833         if (NFD_CFG_MAJOR_VERSION_of(hw->ver) < 2)
1834                 hw->rx_offset = NFP_NET_RX_OFFSET;
1835         else
1836                 hw->rx_offset = nn_cfg_readl(hw, NFP_NET_CFG_RX_OFFSET_ADDR);
1837
1838         PMD_INIT_LOG(INFO, "VER: %u.%u, Maximum supported MTU: %d",
1839                            NFD_CFG_MAJOR_VERSION_of(hw->ver),
1840                            NFD_CFG_MINOR_VERSION_of(hw->ver), hw->max_mtu);
1841
1842         PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s%s%s%s%s%s", hw->cap,
1843                      hw->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
1844                      hw->cap & NFP_NET_CFG_CTRL_L2BC    ? "L2BCFILT " : "",
1845                      hw->cap & NFP_NET_CFG_CTRL_L2MC    ? "L2MCFILT " : "",
1846                      hw->cap & NFP_NET_CFG_CTRL_RXCSUM  ? "RXCSUM "  : "",
1847                      hw->cap & NFP_NET_CFG_CTRL_TXCSUM  ? "TXCSUM "  : "",
1848                      hw->cap & NFP_NET_CFG_CTRL_RXVLAN  ? "RXVLAN "  : "",
1849                      hw->cap & NFP_NET_CFG_CTRL_TXVLAN  ? "TXVLAN "  : "",
1850                      hw->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "",
1851                      hw->cap & NFP_NET_CFG_CTRL_GATHER  ? "GATHER "  : "",
1852                      hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR "  : "",
1853                      hw->cap & NFP_NET_CFG_CTRL_LSO     ? "TSO "     : "",
1854                      hw->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSOv2 "     : "",
1855                      hw->cap & NFP_NET_CFG_CTRL_RSS     ? "RSS "     : "",
1856                      hw->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSSv2 "     : "");
1857
1858         hw->ctrl = 0;
1859
1860         hw->stride_rx = stride;
1861         hw->stride_tx = stride;
1862
1863         PMD_INIT_LOG(INFO, "max_rx_queues: %u, max_tx_queues: %u",
1864                      hw->max_rx_queues, hw->max_tx_queues);
1865
1866         /* Initializing spinlock for reconfigs */
1867         rte_spinlock_init(&hw->reconfig_lock);
1868
1869         /* Allocating memory for mac addr */
1870         eth_dev->data->mac_addrs = rte_zmalloc("mac_addr",
1871                                                RTE_ETHER_ADDR_LEN, 0);
1872         if (eth_dev->data->mac_addrs == NULL) {
1873                 PMD_INIT_LOG(ERR, "Failed to space for MAC address");
1874                 err = -ENOMEM;
1875                 goto dev_err_queues_map;
1876         }
1877
1878         if (hw->is_phyport) {
1879                 nfp_net_pf_read_mac(pf_dev, port);
1880                 nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
1881         } else {
1882                 nfp_net_vf_read_mac(hw);
1883         }
1884
1885         if (!rte_is_valid_assigned_ether_addr(
1886                     (struct rte_ether_addr *)&hw->mac_addr)) {
1887                 PMD_INIT_LOG(INFO, "Using random mac address for port %d",
1888                                    port);
1889                 /* Using random mac addresses for VFs */
1890                 rte_eth_random_addr(&hw->mac_addr[0]);
1891                 nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
1892         }
1893
1894         /* Copying mac address to DPDK eth_dev struct */
1895         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1896                         &eth_dev->data->mac_addrs[0]);
1897
1898         if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
1899                 eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR;
1900
1901         eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1902
1903         PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x "
1904                      "mac=%02x:%02x:%02x:%02x:%02x:%02x",
1905                      eth_dev->data->port_id, pci_dev->id.vendor_id,
1906                      pci_dev->id.device_id,
1907                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1908                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1909
1910         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1911                 /* Registering LSC interrupt handler */
1912                 rte_intr_callback_register(&pci_dev->intr_handle,
1913                                            nfp_net_dev_interrupt_handler,
1914                                            (void *)eth_dev);
1915                 /* Telling the firmware about the LSC interrupt entry */
1916                 nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
1917                 /* Recording current stats counters values */
1918                 nfp_net_stats_reset(eth_dev);
1919         }
1920
1921         return 0;
1922
1923 dev_err_queues_map:
1924                 nfp_cpp_area_free(hw->hwqueues_area);
1925 dev_err_ctrl_map:
1926                 nfp_cpp_area_free(hw->ctrl_area);
1927
1928         return err;
1929 }
1930
1931 #define NFP_CPP_MEMIO_BOUNDARY          (1 << 20)
1932
1933 /*
1934  * Serving a write request to NFP from host programs. The request
1935  * sends the write size and the CPP target. The bridge makes use
1936  * of CPP interface handler configured by the PMD setup.
1937  */
1938 static int
1939 nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp)
1940 {
1941         struct nfp_cpp_area *area;
1942         off_t offset, nfp_offset;
1943         uint32_t cpp_id, pos, len;
1944         uint32_t tmpbuf[16];
1945         size_t count, curlen, totlen = 0;
1946         int err = 0;
1947
1948         PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__,
1949                 sizeof(off_t), sizeof(size_t));
1950
1951         /* Reading the count param */
1952         err = recv(sockfd, &count, sizeof(off_t), 0);
1953         if (err != sizeof(off_t))
1954                 return -EINVAL;
1955
1956         curlen = count;
1957
1958         /* Reading the offset param */
1959         err = recv(sockfd, &offset, sizeof(off_t), 0);
1960         if (err != sizeof(off_t))
1961                 return -EINVAL;
1962
1963         /* Obtain target's CPP ID and offset in target */
1964         cpp_id = (offset >> 40) << 8;
1965         nfp_offset = offset & ((1ull << 40) - 1);
1966
1967         PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count,
1968                 offset);
1969         PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__,
1970                 cpp_id, nfp_offset);
1971
1972         /* Adjust length if not aligned */
1973         if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) !=
1974             (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) {
1975                 curlen = NFP_CPP_MEMIO_BOUNDARY -
1976                         (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1));
1977         }
1978
1979         while (count > 0) {
1980                 /* configure a CPP PCIe2CPP BAR for mapping the CPP target */
1981                 area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev",
1982                                                     nfp_offset, curlen);
1983                 if (!area) {
1984                         RTE_LOG(ERR, PMD, "%s: area alloc fail\n", __func__);
1985                         return -EIO;
1986                 }
1987
1988                 /* mapping the target */
1989                 err = nfp_cpp_area_acquire(area);
1990                 if (err < 0) {
1991                         RTE_LOG(ERR, PMD, "area acquire failed\n");
1992                         nfp_cpp_area_free(area);
1993                         return -EIO;
1994                 }
1995
1996                 for (pos = 0; pos < curlen; pos += len) {
1997                         len = curlen - pos;
1998                         if (len > sizeof(tmpbuf))
1999                                 len = sizeof(tmpbuf);
2000
2001                         PMD_CPP_LOG(DEBUG, "%s: Receive %u of %zu\n", __func__,
2002                                            len, count);
2003                         err = recv(sockfd, tmpbuf, len, MSG_WAITALL);
2004                         if (err != (int)len) {
2005                                 RTE_LOG(ERR, PMD,
2006                                         "%s: error when receiving, %d of %zu\n",
2007                                         __func__, err, count);
2008                                 nfp_cpp_area_release(area);
2009                                 nfp_cpp_area_free(area);
2010                                 return -EIO;
2011                         }
2012                         err = nfp_cpp_area_write(area, pos, tmpbuf, len);
2013                         if (err < 0) {
2014                                 RTE_LOG(ERR, PMD, "nfp_cpp_area_write error\n");
2015                                 nfp_cpp_area_release(area);
2016                                 nfp_cpp_area_free(area);
2017                                 return -EIO;
2018                         }
2019                 }
2020
2021                 nfp_offset += pos;
2022                 totlen += pos;
2023                 nfp_cpp_area_release(area);
2024                 nfp_cpp_area_free(area);
2025
2026                 count -= pos;
2027                 curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ?
2028                          NFP_CPP_MEMIO_BOUNDARY : count;
2029         }
2030
2031         return 0;
2032 }
2033
2034 /*
2035  * Serving a read request to NFP from host programs. The request
2036  * sends the read size and the CPP target. The bridge makes use
2037  * of CPP interface handler configured by the PMD setup. The read
2038  * data is sent to the requester using the same socket.
2039  */
2040 static int
2041 nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp)
2042 {
2043         struct nfp_cpp_area *area;
2044         off_t offset, nfp_offset;
2045         uint32_t cpp_id, pos, len;
2046         uint32_t tmpbuf[16];
2047         size_t count, curlen, totlen = 0;
2048         int err = 0;
2049
2050         PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__,
2051                 sizeof(off_t), sizeof(size_t));
2052
2053         /* Reading the count param */
2054         err = recv(sockfd, &count, sizeof(off_t), 0);
2055         if (err != sizeof(off_t))
2056                 return -EINVAL;
2057
2058         curlen = count;
2059
2060         /* Reading the offset param */
2061         err = recv(sockfd, &offset, sizeof(off_t), 0);
2062         if (err != sizeof(off_t))
2063                 return -EINVAL;
2064
2065         /* Obtain target's CPP ID and offset in target */
2066         cpp_id = (offset >> 40) << 8;
2067         nfp_offset = offset & ((1ull << 40) - 1);
2068
2069         PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count,
2070                            offset);
2071         PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__,
2072                            cpp_id, nfp_offset);
2073
2074         /* Adjust length if not aligned */
2075         if (((nfp_offset + (off_t)count - 1) & ~(NFP_CPP_MEMIO_BOUNDARY - 1)) !=
2076             (nfp_offset & ~(NFP_CPP_MEMIO_BOUNDARY - 1))) {
2077                 curlen = NFP_CPP_MEMIO_BOUNDARY -
2078                         (nfp_offset & (NFP_CPP_MEMIO_BOUNDARY - 1));
2079         }
2080
2081         while (count > 0) {
2082                 area = nfp_cpp_area_alloc_with_name(cpp, cpp_id, "nfp.cdev",
2083                                                     nfp_offset, curlen);
2084                 if (!area) {
2085                         RTE_LOG(ERR, PMD, "%s: area alloc failed\n", __func__);
2086                         return -EIO;
2087                 }
2088
2089                 err = nfp_cpp_area_acquire(area);
2090                 if (err < 0) {
2091                         RTE_LOG(ERR, PMD, "area acquire failed\n");
2092                         nfp_cpp_area_free(area);
2093                         return -EIO;
2094                 }
2095
2096                 for (pos = 0; pos < curlen; pos += len) {
2097                         len = curlen - pos;
2098                         if (len > sizeof(tmpbuf))
2099                                 len = sizeof(tmpbuf);
2100
2101                         err = nfp_cpp_area_read(area, pos, tmpbuf, len);
2102                         if (err < 0) {
2103                                 RTE_LOG(ERR, PMD, "nfp_cpp_area_read error\n");
2104                                 nfp_cpp_area_release(area);
2105                                 nfp_cpp_area_free(area);
2106                                 return -EIO;
2107                         }
2108                         PMD_CPP_LOG(DEBUG, "%s: sending %u of %zu\n", __func__,
2109                                            len, count);
2110
2111                         err = send(sockfd, tmpbuf, len, 0);
2112                         if (err != (int)len) {
2113                                 RTE_LOG(ERR, PMD,
2114                                         "%s: error when sending: %d of %zu\n",
2115                                         __func__, err, count);
2116                                 nfp_cpp_area_release(area);
2117                                 nfp_cpp_area_free(area);
2118                                 return -EIO;
2119                         }
2120                 }
2121
2122                 nfp_offset += pos;
2123                 totlen += pos;
2124                 nfp_cpp_area_release(area);
2125                 nfp_cpp_area_free(area);
2126
2127                 count -= pos;
2128                 curlen = (count > NFP_CPP_MEMIO_BOUNDARY) ?
2129                         NFP_CPP_MEMIO_BOUNDARY : count;
2130         }
2131         return 0;
2132 }
2133
2134 #define NFP_IOCTL 'n'
2135 #define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t)
2136 /*
2137  * Serving a ioctl command from host NFP tools. This usually goes to
2138  * a kernel driver char driver but it is not available when the PF is
2139  * bound to the PMD. Currently just one ioctl command is served and it
2140  * does not require any CPP access at all.
2141  */
2142 static int
2143 nfp_cpp_bridge_serve_ioctl(int sockfd, struct nfp_cpp *cpp)
2144 {
2145         uint32_t cmd, ident_size, tmp;
2146         int err;
2147
2148         /* Reading now the IOCTL command */
2149         err = recv(sockfd, &cmd, 4, 0);
2150         if (err != 4) {
2151                 RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__);
2152                 return -EIO;
2153         }
2154
2155         /* Only supporting NFP_IOCTL_CPP_IDENTIFICATION */
2156         if (cmd != NFP_IOCTL_CPP_IDENTIFICATION) {
2157                 RTE_LOG(ERR, PMD, "%s: unknown cmd %d\n", __func__, cmd);
2158                 return -EINVAL;
2159         }
2160
2161         err = recv(sockfd, &ident_size, 4, 0);
2162         if (err != 4) {
2163                 RTE_LOG(ERR, PMD, "%s: read error from socket\n", __func__);
2164                 return -EIO;
2165         }
2166
2167         tmp = nfp_cpp_model(cpp);
2168
2169         PMD_CPP_LOG(DEBUG, "%s: sending NFP model %08x\n", __func__, tmp);
2170
2171         err = send(sockfd, &tmp, 4, 0);
2172         if (err != 4) {
2173                 RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__);
2174                 return -EIO;
2175         }
2176
2177         tmp = cpp->interface;
2178
2179         PMD_CPP_LOG(DEBUG, "%s: sending NFP interface %08x\n", __func__, tmp);
2180
2181         err = send(sockfd, &tmp, 4, 0);
2182         if (err != 4) {
2183                 RTE_LOG(ERR, PMD, "%s: error writing to socket\n", __func__);
2184                 return -EIO;
2185         }
2186
2187         return 0;
2188 }
2189
2190 #define NFP_BRIDGE_OP_READ      20
2191 #define NFP_BRIDGE_OP_WRITE     30
2192 #define NFP_BRIDGE_OP_IOCTL     40
2193
2194 /*
2195  * This is the code to be executed by a service core. The CPP bridge interface
2196  * is based on a unix socket and requests usually received by a kernel char
2197  * driver, read, write and ioctl, are handled by the CPP bridge. NFP host tools
2198  * can be executed with a wrapper library and LD_LIBRARY being completely
2199  * unaware of the CPP bridge performing the NFP kernel char driver for CPP
2200  * accesses.
2201  */
2202 static int32_t
2203 nfp_cpp_bridge_service_func(void *args)
2204 {
2205         struct sockaddr address;
2206         struct nfp_cpp *cpp = args;
2207         int sockfd, datafd, op, ret;
2208
2209         unlink("/tmp/nfp_cpp");
2210         sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
2211         if (sockfd < 0) {
2212                 RTE_LOG(ERR, PMD, "%s: socket creation error. Service failed\n",
2213                         __func__);
2214                 return -EIO;
2215         }
2216
2217         memset(&address, 0, sizeof(struct sockaddr));
2218
2219         address.sa_family = AF_UNIX;
2220         strcpy(address.sa_data, "/tmp/nfp_cpp");
2221
2222         ret = bind(sockfd, (const struct sockaddr *)&address,
2223                    sizeof(struct sockaddr));
2224         if (ret < 0) {
2225                 RTE_LOG(ERR, PMD, "%s: bind error (%d). Service failed\n",
2226                                   __func__, errno);
2227                 close(sockfd);
2228                 return ret;
2229         }
2230
2231         ret = listen(sockfd, 20);
2232         if (ret < 0) {
2233                 RTE_LOG(ERR, PMD, "%s: listen error(%d). Service failed\n",
2234                                   __func__, errno);
2235                 close(sockfd);
2236                 return ret;
2237         }
2238
2239         for (;;) {
2240                 datafd = accept(sockfd, NULL, NULL);
2241                 if (datafd < 0) {
2242                         RTE_LOG(ERR, PMD, "%s: accept call error (%d)\n",
2243                                           __func__, errno);
2244                         RTE_LOG(ERR, PMD, "%s: service failed\n", __func__);
2245                         close(sockfd);
2246                         return -EIO;
2247                 }
2248
2249                 while (1) {
2250                         ret = recv(datafd, &op, 4, 0);
2251                         if (ret <= 0) {
2252                                 PMD_CPP_LOG(DEBUG, "%s: socket close\n",
2253                                                    __func__);
2254                                 break;
2255                         }
2256
2257                         PMD_CPP_LOG(DEBUG, "%s: getting op %u\n", __func__, op);
2258
2259                         if (op == NFP_BRIDGE_OP_READ)
2260                                 nfp_cpp_bridge_serve_read(datafd, cpp);
2261
2262                         if (op == NFP_BRIDGE_OP_WRITE)
2263                                 nfp_cpp_bridge_serve_write(datafd, cpp);
2264
2265                         if (op == NFP_BRIDGE_OP_IOCTL)
2266                                 nfp_cpp_bridge_serve_ioctl(datafd, cpp);
2267
2268                         if (op == 0)
2269                                 break;
2270                 }
2271                 close(datafd);
2272         }
2273         close(sockfd);
2274
2275         return 0;
2276 }
2277
2278 #define DEFAULT_FW_PATH       "/lib/firmware/netronome"
2279
2280 static int
2281 nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card)
2282 {
2283         struct nfp_cpp *cpp = nsp->cpp;
2284         void *fw_buf;
2285         char fw_name[125];
2286         char serial[40];
2287         size_t fsize;
2288
2289         /* Looking for firmware file in order of priority */
2290
2291         /* First try to find a firmware image specific for this device */
2292         snprintf(serial, sizeof(serial),
2293                         "serial-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x",
2294                 cpp->serial[0], cpp->serial[1], cpp->serial[2], cpp->serial[3],
2295                 cpp->serial[4], cpp->serial[5], cpp->interface >> 8,
2296                 cpp->interface & 0xff);
2297
2298         snprintf(fw_name, sizeof(fw_name), "%s/%s.nffw", DEFAULT_FW_PATH,
2299                         serial);
2300         PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
2301         if (rte_firmware_read(fw_name, &fw_buf, &fsize) == 0)
2302                 goto load_fw;
2303
2304         /* Then try the PCI name */
2305         snprintf(fw_name, sizeof(fw_name), "%s/pci-%s.nffw", DEFAULT_FW_PATH,
2306                         dev->device.name);
2307         PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
2308         if (rte_firmware_read(fw_name, &fw_buf, &fsize) == 0)
2309                 goto load_fw;
2310
2311         /* Finally try the card type and media */
2312         snprintf(fw_name, sizeof(fw_name), "%s/%s", DEFAULT_FW_PATH, card);
2313         PMD_DRV_LOG(DEBUG, "Trying with fw file: %s", fw_name);
2314         if (rte_firmware_read(fw_name, &fw_buf, &fsize) < 0) {
2315                 PMD_DRV_LOG(INFO, "Firmware file %s not found.", fw_name);
2316                 return -ENOENT;
2317         }
2318
2319 load_fw:
2320         PMD_DRV_LOG(INFO, "Firmware file found at %s with size: %zu",
2321                 fw_name, fsize);
2322
2323         PMD_DRV_LOG(INFO, "Uploading the firmware ...");
2324         nfp_nsp_load_fw(nsp, fw_buf, fsize);
2325         PMD_DRV_LOG(INFO, "Done");
2326
2327         free(fw_buf);
2328         return 0;
2329 }
2330
2331 static int
2332 nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp,
2333              struct nfp_eth_table *nfp_eth_table, struct nfp_hwinfo *hwinfo)
2334 {
2335         struct nfp_nsp *nsp;
2336         const char *nfp_fw_model;
2337         char card_desc[100];
2338         int err = 0;
2339
2340         nfp_fw_model = nfp_hwinfo_lookup(hwinfo, "assembly.partno");
2341
2342         if (nfp_fw_model) {
2343                 PMD_DRV_LOG(INFO, "firmware model found: %s", nfp_fw_model);
2344         } else {
2345                 PMD_DRV_LOG(ERR, "firmware model NOT found");
2346                 return -EIO;
2347         }
2348
2349         if (nfp_eth_table->count == 0 || nfp_eth_table->count > 8) {
2350                 PMD_DRV_LOG(ERR, "NFP ethernet table reports wrong ports: %u",
2351                        nfp_eth_table->count);
2352                 return -EIO;
2353         }
2354
2355         PMD_DRV_LOG(INFO, "NFP ethernet port table reports %u ports",
2356                            nfp_eth_table->count);
2357
2358         PMD_DRV_LOG(INFO, "Port speed: %u", nfp_eth_table->ports[0].speed);
2359
2360         snprintf(card_desc, sizeof(card_desc), "nic_%s_%dx%d.nffw",
2361                         nfp_fw_model, nfp_eth_table->count,
2362                         nfp_eth_table->ports[0].speed / 1000);
2363
2364         nsp = nfp_nsp_open(cpp);
2365         if (!nsp) {
2366                 PMD_DRV_LOG(ERR, "NFP error when obtaining NSP handle");
2367                 return -EIO;
2368         }
2369
2370         nfp_nsp_device_soft_reset(nsp);
2371         err = nfp_fw_upload(dev, nsp, card_desc);
2372
2373         nfp_nsp_close(nsp);
2374         return err;
2375 }
2376
2377 static int nfp_init_phyports(struct nfp_pf_dev *pf_dev)
2378 {
2379         struct nfp_net_hw *hw;
2380         struct rte_eth_dev *eth_dev;
2381         struct nfp_eth_table *nfp_eth_table = NULL;
2382         int ret = 0;
2383         int i;
2384
2385         nfp_eth_table = nfp_eth_read_ports(pf_dev->cpp);
2386         if (!nfp_eth_table) {
2387                 PMD_INIT_LOG(ERR, "Error reading NFP ethernet table");
2388                 ret = -EIO;
2389                 goto error;
2390         }
2391
2392         /* Loop through all physical ports on PF */
2393         for (i = 0; i < pf_dev->total_phyports; i++) {
2394                 const unsigned int numa_node = rte_socket_id();
2395                 char port_name[RTE_ETH_NAME_MAX_LEN];
2396
2397                 snprintf(port_name, sizeof(port_name), "%s_port%d",
2398                          pf_dev->pci_dev->device.name, i);
2399
2400                 /* Allocate a eth_dev for this phyport */
2401                 eth_dev = rte_eth_dev_allocate(port_name);
2402                 if (!eth_dev) {
2403                         ret = -ENODEV;
2404                         goto port_cleanup;
2405                 }
2406
2407                 /* Allocate memory for this phyport */
2408                 eth_dev->data->dev_private =
2409                         rte_zmalloc_socket(port_name, sizeof(struct nfp_net_hw),
2410                                            RTE_CACHE_LINE_SIZE, numa_node);
2411                 if (!eth_dev->data->dev_private) {
2412                         ret = -ENOMEM;
2413                         rte_eth_dev_release_port(eth_dev);
2414                         goto port_cleanup;
2415                 }
2416
2417                 hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2418
2419                 /* Add this device to the PF's array of physical ports */
2420                 pf_dev->ports[i] = hw;
2421
2422                 hw->pf_dev = pf_dev;
2423                 hw->cpp = pf_dev->cpp;
2424                 hw->eth_dev = eth_dev;
2425                 hw->idx = i;
2426                 hw->nfp_idx = nfp_eth_table->ports[i].index;
2427                 hw->is_phyport = true;
2428
2429                 eth_dev->device = &pf_dev->pci_dev->device;
2430
2431                 /* ctrl/tx/rx BAR mappings and remaining init happens in
2432                  * nfp_net_init
2433                  */
2434                 ret = nfp_net_init(eth_dev);
2435
2436                 if (ret) {
2437                         ret = -ENODEV;
2438                         goto port_cleanup;
2439                 }
2440
2441                 rte_eth_dev_probing_finish(eth_dev);
2442
2443         } /* End loop, all ports on this PF */
2444         ret = 0;
2445         goto eth_table_cleanup;
2446
2447 port_cleanup:
2448         for (i = 0; i < pf_dev->total_phyports; i++) {
2449                 if (pf_dev->ports[i] && pf_dev->ports[i]->eth_dev) {
2450                         struct rte_eth_dev *tmp_dev;
2451                         tmp_dev = pf_dev->ports[i]->eth_dev;
2452                         rte_eth_dev_release_port(tmp_dev);
2453                         pf_dev->ports[i] = NULL;
2454                 }
2455         }
2456 eth_table_cleanup:
2457         free(nfp_eth_table);
2458 error:
2459         return ret;
2460 }
2461
2462 static void nfp_register_cpp_service(struct nfp_cpp *cpp)
2463 {
2464         uint32_t *cpp_service_id = NULL;
2465         struct rte_service_spec service;
2466
2467         memset(&service, 0, sizeof(struct rte_service_spec));
2468         snprintf(service.name, sizeof(service.name), "nfp_cpp_service");
2469         service.callback = nfp_cpp_bridge_service_func;
2470         service.callback_userdata = (void *)cpp;
2471
2472         if (rte_service_component_register(&service,
2473                                            cpp_service_id))
2474                 RTE_LOG(WARNING, PMD, "NFP CPP bridge service register() failed");
2475         else
2476                 RTE_LOG(DEBUG, PMD, "NFP CPP bridge service registered");
2477 }
2478
2479 static int nfp_pf_init(struct rte_pci_device *pci_dev)
2480 {
2481         struct nfp_pf_dev *pf_dev = NULL;
2482         struct nfp_cpp *cpp;
2483         struct nfp_hwinfo *hwinfo;
2484         struct nfp_rtsym_table *sym_tbl;
2485         struct nfp_eth_table *nfp_eth_table = NULL;
2486         char name[RTE_ETH_NAME_MAX_LEN];
2487         int total_ports;
2488         int ret = -ENODEV;
2489         int err;
2490
2491         if (!pci_dev)
2492                 return ret;
2493
2494         /*
2495          * When device bound to UIO, the device could be used, by mistake,
2496          * by two DPDK apps, and the UIO driver does not avoid it. This
2497          * could lead to a serious problem when configuring the NFP CPP
2498          * interface. Here we avoid this telling to the CPP init code to
2499          * use a lock file if UIO is being used.
2500          */
2501         if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO)
2502                 cpp = nfp_cpp_from_device_name(pci_dev, 0);
2503         else
2504                 cpp = nfp_cpp_from_device_name(pci_dev, 1);
2505
2506         if (!cpp) {
2507                 PMD_INIT_LOG(ERR, "A CPP handle can not be obtained");
2508                 ret = -EIO;
2509                 goto error;
2510         }
2511
2512         hwinfo = nfp_hwinfo_read(cpp);
2513         if (!hwinfo) {
2514                 PMD_INIT_LOG(ERR, "Error reading hwinfo table");
2515                 ret = -EIO;
2516                 goto error;
2517         }
2518
2519         nfp_eth_table = nfp_eth_read_ports(cpp);
2520         if (!nfp_eth_table) {
2521                 PMD_INIT_LOG(ERR, "Error reading NFP ethernet table");
2522                 ret = -EIO;
2523                 goto hwinfo_cleanup;
2524         }
2525
2526         if (nfp_fw_setup(pci_dev, cpp, nfp_eth_table, hwinfo)) {
2527                 PMD_INIT_LOG(ERR, "Error when uploading firmware");
2528                 ret = -EIO;
2529                 goto eth_table_cleanup;
2530         }
2531
2532         /* Now the symbol table should be there */
2533         sym_tbl = nfp_rtsym_table_read(cpp);
2534         if (!sym_tbl) {
2535                 PMD_INIT_LOG(ERR, "Something is wrong with the firmware"
2536                                 " symbol table");
2537                 ret = -EIO;
2538                 goto eth_table_cleanup;
2539         }
2540
2541         total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err);
2542         if (total_ports != (int)nfp_eth_table->count) {
2543                 PMD_DRV_LOG(ERR, "Inconsistent number of ports");
2544                 ret = -EIO;
2545                 goto sym_tbl_cleanup;
2546         }
2547
2548         PMD_INIT_LOG(INFO, "Total physical ports: %d", total_ports);
2549
2550         if (total_ports <= 0 || total_ports > 8) {
2551                 PMD_INIT_LOG(ERR, "nfd_cfg_pf0_num_ports symbol with wrong value");
2552                 ret = -ENODEV;
2553                 goto sym_tbl_cleanup;
2554         }
2555         /* Allocate memory for the PF "device" */
2556         snprintf(name, sizeof(name), "nfp_pf%d", 0);
2557         pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0);
2558         if (!pf_dev) {
2559                 ret = -ENOMEM;
2560                 goto sym_tbl_cleanup;
2561         }
2562
2563         /* Populate the newly created PF device */
2564         pf_dev->cpp = cpp;
2565         pf_dev->hwinfo = hwinfo;
2566         pf_dev->sym_tbl = sym_tbl;
2567         pf_dev->total_phyports = total_ports;
2568
2569         if (total_ports > 1)
2570                 pf_dev->multiport = true;
2571
2572         pf_dev->pci_dev = pci_dev;
2573
2574         /* Map the symbol table */
2575         pf_dev->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_bar0",
2576                                      pf_dev->total_phyports * 32768,
2577                                      &pf_dev->ctrl_area);
2578         if (!pf_dev->ctrl_bar) {
2579                 PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for _pf0_net_ctrl_bar");
2580                 ret = -EIO;
2581                 goto pf_cleanup;
2582         }
2583
2584         PMD_INIT_LOG(DEBUG, "ctrl bar: %p", pf_dev->ctrl_bar);
2585
2586         /* configure access to tx/rx vNIC BARs */
2587         pf_dev->hw_queues = nfp_cpp_map_area(pf_dev->cpp, 0, 0,
2588                                               NFP_PCIE_QUEUE(0),
2589                                               NFP_QCP_QUEUE_AREA_SZ,
2590                                               &pf_dev->hwqueues_area);
2591         if (!pf_dev->hw_queues) {
2592                 PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for net.qc");
2593                 ret = -EIO;
2594                 goto ctrl_area_cleanup;
2595         }
2596
2597         PMD_INIT_LOG(DEBUG, "tx/rx bar address: 0x%p", pf_dev->hw_queues);
2598
2599         /* Initialize and prep physical ports now
2600          * This will loop through all physical ports
2601          */
2602         ret = nfp_init_phyports(pf_dev);
2603         if (ret) {
2604                 PMD_INIT_LOG(ERR, "Could not create physical ports");
2605                 goto hwqueues_cleanup;
2606         }
2607
2608         /* register the CPP bridge service here for primary use */
2609         nfp_register_cpp_service(pf_dev->cpp);
2610
2611         return 0;
2612
2613 hwqueues_cleanup:
2614         nfp_cpp_area_free(pf_dev->hwqueues_area);
2615 ctrl_area_cleanup:
2616         nfp_cpp_area_free(pf_dev->ctrl_area);
2617 pf_cleanup:
2618         rte_free(pf_dev);
2619 sym_tbl_cleanup:
2620         free(sym_tbl);
2621 eth_table_cleanup:
2622         free(nfp_eth_table);
2623 hwinfo_cleanup:
2624         free(hwinfo);
2625 error:
2626         return ret;
2627 }
2628
2629 /*
2630  * When attaching to the NFP4000/6000 PF on a secondary process there
2631  * is no need to initialize the PF again. Only minimal work is required
2632  * here
2633  */
2634 static int nfp_pf_secondary_init(struct rte_pci_device *pci_dev)
2635 {
2636         struct nfp_cpp *cpp;
2637         struct nfp_rtsym_table *sym_tbl;
2638         int total_ports;
2639         int i;
2640         int err;
2641
2642         if (!pci_dev)
2643                 return -ENODEV;
2644
2645         /*
2646          * When device bound to UIO, the device could be used, by mistake,
2647          * by two DPDK apps, and the UIO driver does not avoid it. This
2648          * could lead to a serious problem when configuring the NFP CPP
2649          * interface. Here we avoid this telling to the CPP init code to
2650          * use a lock file if UIO is being used.
2651          */
2652         if (pci_dev->kdrv == RTE_PCI_KDRV_VFIO)
2653                 cpp = nfp_cpp_from_device_name(pci_dev, 0);
2654         else
2655                 cpp = nfp_cpp_from_device_name(pci_dev, 1);
2656
2657         if (!cpp) {
2658                 PMD_INIT_LOG(ERR, "A CPP handle can not be obtained");
2659                 return -EIO;
2660         }
2661
2662         /*
2663          * We don't have access to the PF created in the primary process
2664          * here so we have to read the number of ports from firmware
2665          */
2666         sym_tbl = nfp_rtsym_table_read(cpp);
2667         if (!sym_tbl) {
2668                 PMD_INIT_LOG(ERR, "Something is wrong with the firmware"
2669                                 " symbol table");
2670                 return -EIO;
2671         }
2672
2673         total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err);
2674
2675         for (i = 0; i < total_ports; i++) {
2676                 struct rte_eth_dev *eth_dev;
2677                 char port_name[RTE_ETH_NAME_MAX_LEN];
2678
2679                 snprintf(port_name, sizeof(port_name), "%s_port%d",
2680                          pci_dev->device.name, i);
2681
2682                 PMD_DRV_LOG(DEBUG, "Secondary attaching to port %s",
2683                     port_name);
2684                 eth_dev = rte_eth_dev_attach_secondary(port_name);
2685                 if (!eth_dev) {
2686                         RTE_LOG(ERR, EAL,
2687                         "secondary process attach failed, "
2688                         "ethdev doesn't exist");
2689                         return -ENODEV;
2690                 }
2691                 eth_dev->process_private = cpp;
2692                 eth_dev->dev_ops = &nfp_net_eth_dev_ops;
2693                 eth_dev->rx_queue_count = nfp_net_rx_queue_count;
2694                 eth_dev->rx_pkt_burst = &nfp_net_recv_pkts;
2695                 eth_dev->tx_pkt_burst = &nfp_net_xmit_pkts;
2696                 rte_eth_dev_probing_finish(eth_dev);
2697         }
2698
2699         /* Register the CPP bridge service for the secondary too */
2700         nfp_register_cpp_service(cpp);
2701
2702         return 0;
2703 }
2704
2705 static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2706                             struct rte_pci_device *dev)
2707 {
2708         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
2709                 return nfp_pf_init(dev);
2710         else
2711                 return nfp_pf_secondary_init(dev);
2712 }
2713
2714 static const struct rte_pci_id pci_id_nfp_pf_net_map[] = {
2715         {
2716                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2717                                PCI_DEVICE_ID_NFP4000_PF_NIC)
2718         },
2719         {
2720                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2721                                PCI_DEVICE_ID_NFP6000_PF_NIC)
2722         },
2723         {
2724                 .vendor_id = 0,
2725         },
2726 };
2727
2728 static const struct rte_pci_id pci_id_nfp_vf_net_map[] = {
2729         {
2730                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2731                                PCI_DEVICE_ID_NFP6000_VF_NIC)
2732         },
2733         {
2734                 .vendor_id = 0,
2735         },
2736 };
2737
2738 static int nfp_pci_uninit(struct rte_eth_dev *eth_dev)
2739 {
2740         struct rte_pci_device *pci_dev;
2741         uint16_t port_id;
2742
2743         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2744
2745         if (pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC ||
2746             pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC) {
2747                 /* Free up all physical ports under PF */
2748                 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device)
2749                         rte_eth_dev_close(port_id);
2750                 /*
2751                  * Ports can be closed and freed but hotplugging is not
2752                  * currently supported
2753                  */
2754                 return -ENOTSUP;
2755         }
2756
2757         /* VF cleanup, just free private port data */
2758         return nfp_net_close(eth_dev);
2759 }
2760
2761 static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2762         struct rte_pci_device *pci_dev)
2763 {
2764         return rte_eth_dev_pci_generic_probe(pci_dev,
2765                 sizeof(struct nfp_net_adapter), nfp_net_init);
2766 }
2767
2768 static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
2769 {
2770         return rte_eth_dev_pci_generic_remove(pci_dev, nfp_pci_uninit);
2771 }
2772
2773 static struct rte_pci_driver rte_nfp_net_pf_pmd = {
2774         .id_table = pci_id_nfp_pf_net_map,
2775         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2776         .probe = nfp_pf_pci_probe,
2777         .remove = eth_nfp_pci_remove,
2778 };
2779
2780 static struct rte_pci_driver rte_nfp_net_vf_pmd = {
2781         .id_table = pci_id_nfp_vf_net_map,
2782         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2783         .probe = eth_nfp_pci_probe,
2784         .remove = eth_nfp_pci_remove,
2785 };
2786
2787 RTE_PMD_REGISTER_PCI(net_nfp_pf, rte_nfp_net_pf_pmd);
2788 RTE_PMD_REGISTER_PCI(net_nfp_vf, rte_nfp_net_vf_pmd);
2789 RTE_PMD_REGISTER_PCI_TABLE(net_nfp_pf, pci_id_nfp_pf_net_map);
2790 RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
2791 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
2792 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
2793 RTE_LOG_REGISTER_SUFFIX(nfp_logtype_init, init, NOTICE);
2794 RTE_LOG_REGISTER_SUFFIX(nfp_logtype_driver, driver, NOTICE);
2795 /*
2796  * Local variables:
2797  * c-file-style: "Linux"
2798  * indent-tabs-mode: t
2799  * End:
2800  */