net/nfp: configure L2 broadcast and multicast
[dpdk.git] / drivers / net / nfp / nfp_net.c
1 /*
2  * Copyright (c) 2014, 2015 Netronome Systems, Inc.
3  * All rights reserved.
4  *
5  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  *  this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *  notice, this list of conditions and the following disclaimer in the
15  *  documentation and/or other materials provided with the distribution
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  *  contributors may be used to endorse or promote products derived from this
19  *  software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /*
35  * vim:shiftwidth=8:noexpandtab
36  *
37  * @file dpdk/pmd/nfp_net.c
38  *
39  * Netronome vNIC DPDK Poll-Mode Driver: Main entry point
40  */
41
42 #include <rte_byteorder.h>
43 #include <rte_common.h>
44 #include <rte_log.h>
45 #include <rte_debug.h>
46 #include <rte_ethdev.h>
47 #include <rte_ethdev_pci.h>
48 #include <rte_dev.h>
49 #include <rte_ether.h>
50 #include <rte_malloc.h>
51 #include <rte_memzone.h>
52 #include <rte_mempool.h>
53 #include <rte_version.h>
54 #include <rte_string_fns.h>
55 #include <rte_alarm.h>
56 #include <rte_spinlock.h>
57
58 #include "nfp_nfpu.h"
59 #include "nfp_net_pmd.h"
60 #include "nfp_net_logs.h"
61 #include "nfp_net_ctrl.h"
62
63 /* Prototypes */
64 static void nfp_net_close(struct rte_eth_dev *dev);
65 static int nfp_net_configure(struct rte_eth_dev *dev);
66 static void nfp_net_dev_interrupt_handler(void *param);
67 static void nfp_net_dev_interrupt_delayed_handler(void *param);
68 static int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
69 static void nfp_net_infos_get(struct rte_eth_dev *dev,
70                               struct rte_eth_dev_info *dev_info);
71 static int nfp_net_init(struct rte_eth_dev *eth_dev);
72 static int nfp_net_link_update(struct rte_eth_dev *dev, int wait_to_complete);
73 static void nfp_net_promisc_enable(struct rte_eth_dev *dev);
74 static void nfp_net_promisc_disable(struct rte_eth_dev *dev);
75 static int nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq);
76 static uint32_t nfp_net_rx_queue_count(struct rte_eth_dev *dev,
77                                        uint16_t queue_idx);
78 static uint16_t nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
79                                   uint16_t nb_pkts);
80 static void nfp_net_rx_queue_release(void *rxq);
81 static int nfp_net_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
82                                   uint16_t nb_desc, unsigned int socket_id,
83                                   const struct rte_eth_rxconf *rx_conf,
84                                   struct rte_mempool *mp);
85 static int nfp_net_tx_free_bufs(struct nfp_net_txq *txq);
86 static void nfp_net_tx_queue_release(void *txq);
87 static int nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
88                                   uint16_t nb_desc, unsigned int socket_id,
89                                   const struct rte_eth_txconf *tx_conf);
90 static int nfp_net_start(struct rte_eth_dev *dev);
91 static void nfp_net_stats_get(struct rte_eth_dev *dev,
92                               struct rte_eth_stats *stats);
93 static void nfp_net_stats_reset(struct rte_eth_dev *dev);
94 static void nfp_net_stop(struct rte_eth_dev *dev);
95 static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
96                                   uint16_t nb_pkts);
97
98 /*
99  * The offset of the queue controller queues in the PCIe Target. These
100  * happen to be at the same offset on the NFP6000 and the NFP3200 so
101  * we use a single macro here.
102  */
103 #define NFP_PCIE_QUEUE(_q)      (0x800 * ((_q) & 0xff))
104
105 /* Maximum value which can be added to a queue with one transaction */
106 #define NFP_QCP_MAX_ADD 0x7f
107
108 #define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \
109         (uint64_t)((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
110
111 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
112 enum nfp_qcp_ptr {
113         NFP_QCP_READ_PTR = 0,
114         NFP_QCP_WRITE_PTR
115 };
116
117 /*
118  * nfp_qcp_ptr_add - Add the value to the selected pointer of a queue
119  * @q: Base address for queue structure
120  * @ptr: Add to the Read or Write pointer
121  * @val: Value to add to the queue pointer
122  *
123  * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed.
124  */
125 static inline void
126 nfp_qcp_ptr_add(uint8_t *q, enum nfp_qcp_ptr ptr, uint32_t val)
127 {
128         uint32_t off;
129
130         if (ptr == NFP_QCP_READ_PTR)
131                 off = NFP_QCP_QUEUE_ADD_RPTR;
132         else
133                 off = NFP_QCP_QUEUE_ADD_WPTR;
134
135         while (val > NFP_QCP_MAX_ADD) {
136                 nn_writel(rte_cpu_to_le_32(NFP_QCP_MAX_ADD), q + off);
137                 val -= NFP_QCP_MAX_ADD;
138         }
139
140         nn_writel(rte_cpu_to_le_32(val), q + off);
141 }
142
143 /*
144  * nfp_qcp_read - Read the current Read/Write pointer value for a queue
145  * @q:  Base address for queue structure
146  * @ptr: Read or Write pointer
147  */
148 static inline uint32_t
149 nfp_qcp_read(uint8_t *q, enum nfp_qcp_ptr ptr)
150 {
151         uint32_t off;
152         uint32_t val;
153
154         if (ptr == NFP_QCP_READ_PTR)
155                 off = NFP_QCP_QUEUE_STS_LO;
156         else
157                 off = NFP_QCP_QUEUE_STS_HI;
158
159         val = rte_cpu_to_le_32(nn_readl(q + off));
160
161         if (ptr == NFP_QCP_READ_PTR)
162                 return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask;
163         else
164                 return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask;
165 }
166
167 /*
168  * Functions to read/write from/to Config BAR
169  * Performs any endian conversion necessary.
170  */
171 static inline uint8_t
172 nn_cfg_readb(struct nfp_net_hw *hw, int off)
173 {
174         return nn_readb(hw->ctrl_bar + off);
175 }
176
177 static inline void
178 nn_cfg_writeb(struct nfp_net_hw *hw, int off, uint8_t val)
179 {
180         nn_writeb(val, hw->ctrl_bar + off);
181 }
182
183 static inline uint32_t
184 nn_cfg_readl(struct nfp_net_hw *hw, int off)
185 {
186         return rte_le_to_cpu_32(nn_readl(hw->ctrl_bar + off));
187 }
188
189 static inline void
190 nn_cfg_writel(struct nfp_net_hw *hw, int off, uint32_t val)
191 {
192         nn_writel(rte_cpu_to_le_32(val), hw->ctrl_bar + off);
193 }
194
195 static inline uint64_t
196 nn_cfg_readq(struct nfp_net_hw *hw, int off)
197 {
198         return rte_le_to_cpu_64(nn_readq(hw->ctrl_bar + off));
199 }
200
201 static inline void
202 nn_cfg_writeq(struct nfp_net_hw *hw, int off, uint64_t val)
203 {
204         nn_writeq(rte_cpu_to_le_64(val), hw->ctrl_bar + off);
205 }
206
207 /*
208  * Atomically reads link status information from global structure rte_eth_dev.
209  *
210  * @param dev
211  *   - Pointer to the structure rte_eth_dev to read from.
212  *   - Pointer to the buffer to be saved with the link status.
213  *
214  * @return
215  *   - On success, zero.
216  *   - On failure, negative value.
217  */
218 static inline int
219 nfp_net_dev_atomic_read_link_status(struct rte_eth_dev *dev,
220                                     struct rte_eth_link *link)
221 {
222         struct rte_eth_link *dst = link;
223         struct rte_eth_link *src = &dev->data->dev_link;
224
225         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
226                                 *(uint64_t *)src) == 0)
227                 return -1;
228
229         return 0;
230 }
231
232 /*
233  * Atomically writes the link status information into global
234  * structure rte_eth_dev.
235  *
236  * @param dev
237  *   - Pointer to the structure rte_eth_dev to read from.
238  *   - Pointer to the buffer to be saved with the link status.
239  *
240  * @return
241  *   - On success, zero.
242  *   - On failure, negative value.
243  */
244 static inline int
245 nfp_net_dev_atomic_write_link_status(struct rte_eth_dev *dev,
246                                      struct rte_eth_link *link)
247 {
248         struct rte_eth_link *dst = &dev->data->dev_link;
249         struct rte_eth_link *src = link;
250
251         if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
252                                 *(uint64_t *)src) == 0)
253                 return -1;
254
255         return 0;
256 }
257
258 static void
259 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq)
260 {
261         unsigned i;
262
263         if (rxq->rxbufs == NULL)
264                 return;
265
266         for (i = 0; i < rxq->rx_count; i++) {
267                 if (rxq->rxbufs[i].mbuf) {
268                         rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf);
269                         rxq->rxbufs[i].mbuf = NULL;
270                 }
271         }
272 }
273
274 static void
275 nfp_net_rx_queue_release(void *rx_queue)
276 {
277         struct nfp_net_rxq *rxq = rx_queue;
278
279         if (rxq) {
280                 nfp_net_rx_queue_release_mbufs(rxq);
281                 rte_free(rxq->rxbufs);
282                 rte_free(rxq);
283         }
284 }
285
286 static void
287 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
288 {
289         nfp_net_rx_queue_release_mbufs(rxq);
290         rxq->rd_p = 0;
291         rxq->nb_rx_hold = 0;
292 }
293
294 static void
295 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
296 {
297         unsigned i;
298
299         if (txq->txbufs == NULL)
300                 return;
301
302         for (i = 0; i < txq->tx_count; i++) {
303                 if (txq->txbufs[i].mbuf) {
304                         rte_pktmbuf_free(txq->txbufs[i].mbuf);
305                         txq->txbufs[i].mbuf = NULL;
306                 }
307         }
308 }
309
310 static void
311 nfp_net_tx_queue_release(void *tx_queue)
312 {
313         struct nfp_net_txq *txq = tx_queue;
314
315         if (txq) {
316                 nfp_net_tx_queue_release_mbufs(txq);
317                 rte_free(txq->txbufs);
318                 rte_free(txq);
319         }
320 }
321
322 static void
323 nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
324 {
325         nfp_net_tx_queue_release_mbufs(txq);
326         txq->wr_p = 0;
327         txq->rd_p = 0;
328 }
329
330 static int
331 __nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t update)
332 {
333         int cnt;
334         uint32_t new;
335         struct timespec wait;
336
337         PMD_DRV_LOG(DEBUG, "Writing to the configuration queue (%p)...\n",
338                     hw->qcp_cfg);
339
340         if (hw->qcp_cfg == NULL)
341                 rte_panic("Bad configuration queue pointer\n");
342
343         nfp_qcp_ptr_add(hw->qcp_cfg, NFP_QCP_WRITE_PTR, 1);
344
345         wait.tv_sec = 0;
346         wait.tv_nsec = 1000000;
347
348         PMD_DRV_LOG(DEBUG, "Polling for update ack...\n");
349
350         /* Poll update field, waiting for NFP to ack the config */
351         for (cnt = 0; ; cnt++) {
352                 new = nn_cfg_readl(hw, NFP_NET_CFG_UPDATE);
353                 if (new == 0)
354                         break;
355                 if (new & NFP_NET_CFG_UPDATE_ERR) {
356                         PMD_INIT_LOG(ERR, "Reconfig error: 0x%08x", new);
357                         return -1;
358                 }
359                 if (cnt >= NFP_NET_POLL_TIMEOUT) {
360                         PMD_INIT_LOG(ERR, "Reconfig timeout for 0x%08x after"
361                                           " %dms", update, cnt);
362                         rte_panic("Exiting\n");
363                 }
364                 nanosleep(&wait, 0); /* waiting for a 1ms */
365         }
366         PMD_DRV_LOG(DEBUG, "Ack DONE\n");
367         return 0;
368 }
369
370 /*
371  * Reconfigure the NIC
372  * @nn:    device to reconfigure
373  * @ctrl:    The value for the ctrl field in the BAR config
374  * @update:  The value for the update field in the BAR config
375  *
376  * Write the update word to the BAR and ping the reconfig queue. Then poll
377  * until the firmware has acknowledged the update by zeroing the update word.
378  */
379 static int
380 nfp_net_reconfig(struct nfp_net_hw *hw, uint32_t ctrl, uint32_t update)
381 {
382         uint32_t err;
383
384         PMD_DRV_LOG(DEBUG, "nfp_net_reconfig: ctrl=%08x update=%08x\n",
385                     ctrl, update);
386
387         rte_spinlock_lock(&hw->reconfig_lock);
388
389         nn_cfg_writel(hw, NFP_NET_CFG_CTRL, ctrl);
390         nn_cfg_writel(hw, NFP_NET_CFG_UPDATE, update);
391
392         rte_wmb();
393
394         err = __nfp_net_reconfig(hw, update);
395
396         rte_spinlock_unlock(&hw->reconfig_lock);
397
398         if (!err)
399                 return 0;
400
401         /*
402          * Reconfig errors imply situations where they can be handled.
403          * Otherwise, rte_panic is called inside __nfp_net_reconfig
404          */
405         PMD_INIT_LOG(ERR, "Error nfp_net reconfig for ctrl: %x update: %x",
406                      ctrl, update);
407         return -EIO;
408 }
409
410 /*
411  * Configure an Ethernet device. This function must be invoked first
412  * before any other function in the Ethernet API. This function can
413  * also be re-invoked when a device is in the stopped state.
414  */
415 static int
416 nfp_net_configure(struct rte_eth_dev *dev)
417 {
418         struct rte_eth_conf *dev_conf;
419         struct rte_eth_rxmode *rxmode;
420         struct rte_eth_txmode *txmode;
421         uint32_t new_ctrl = 0;
422         uint32_t update = 0;
423         struct nfp_net_hw *hw;
424
425         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
426
427         /*
428          * A DPDK app sends info about how many queues to use and how
429          * those queues need to be configured. This is used by the
430          * DPDK core and it makes sure no more queues than those
431          * advertised by the driver are requested. This function is
432          * called after that internal process
433          */
434
435         PMD_INIT_LOG(DEBUG, "Configure");
436
437         dev_conf = &dev->data->dev_conf;
438         rxmode = &dev_conf->rxmode;
439         txmode = &dev_conf->txmode;
440
441         /* Checking TX mode */
442         if (txmode->mq_mode) {
443                 PMD_INIT_LOG(INFO, "TX mq_mode DCB and VMDq not supported");
444                 return -EINVAL;
445         }
446
447         /* Checking RX mode */
448         if (rxmode->mq_mode & ETH_MQ_RX_RSS) {
449                 if (hw->cap & NFP_NET_CFG_CTRL_RSS) {
450                         update = NFP_NET_CFG_UPDATE_RSS;
451                         new_ctrl = NFP_NET_CFG_CTRL_RSS;
452                 } else {
453                         PMD_INIT_LOG(INFO, "RSS not supported");
454                         return -EINVAL;
455                 }
456         }
457
458         if (rxmode->split_hdr_size) {
459                 PMD_INIT_LOG(INFO, "rxmode does not support split header");
460                 return -EINVAL;
461         }
462
463         if (rxmode->hw_ip_checksum) {
464                 if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM) {
465                         new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
466                 } else {
467                         PMD_INIT_LOG(INFO, "RXCSUM not supported");
468                         return -EINVAL;
469                 }
470         }
471
472         if (rxmode->hw_vlan_filter) {
473                 PMD_INIT_LOG(INFO, "VLAN filter not supported");
474                 return -EINVAL;
475         }
476
477         if (rxmode->hw_vlan_strip) {
478                 if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN) {
479                         new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
480                 } else {
481                         PMD_INIT_LOG(INFO, "hw vlan strip not supported");
482                         return -EINVAL;
483                 }
484         }
485
486         if (rxmode->hw_vlan_extend) {
487                 PMD_INIT_LOG(INFO, "VLAN extended not supported");
488                 return -EINVAL;
489         }
490
491         if (rxmode->jumbo_frame)
492                 /* this is handled in rte_eth_dev_configure */
493
494         if (rxmode->hw_strip_crc) {
495                 PMD_INIT_LOG(INFO, "strip CRC not supported");
496                 return -EINVAL;
497         }
498
499         if (rxmode->enable_scatter) {
500                 PMD_INIT_LOG(INFO, "Scatter not supported");
501                 return -EINVAL;
502         }
503
504         /* If next capabilities are supported, configure them by default */
505
506         /* VLAN insertion */
507         if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
508                 new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
509
510         /* L2 broadcast */
511         if (hw->cap & NFP_NET_CFG_CTRL_L2BC)
512                 new_ctrl |= NFP_NET_CFG_CTRL_L2BC;
513
514         /* L2 multicast */
515         if (hw->cap & NFP_NET_CFG_CTRL_L2MC)
516                 new_ctrl |= NFP_NET_CFG_CTRL_L2MC;
517
518         if (!new_ctrl)
519                 return 0;
520
521         update |= NFP_NET_CFG_UPDATE_GEN;
522
523         nn_cfg_writel(hw, NFP_NET_CFG_CTRL, new_ctrl);
524         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
525                 return -EIO;
526
527         hw->ctrl = new_ctrl;
528
529         return 0;
530 }
531
532 static void
533 nfp_net_enable_queues(struct rte_eth_dev *dev)
534 {
535         struct nfp_net_hw *hw;
536         uint64_t enabled_queues = 0;
537         int i;
538
539         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
540
541         /* Enabling the required TX queues in the device */
542         for (i = 0; i < dev->data->nb_tx_queues; i++)
543                 enabled_queues |= (1 << i);
544
545         nn_cfg_writeq(hw, NFP_NET_CFG_TXRS_ENABLE, enabled_queues);
546
547         enabled_queues = 0;
548
549         /* Enabling the required RX queues in the device */
550         for (i = 0; i < dev->data->nb_rx_queues; i++)
551                 enabled_queues |= (1 << i);
552
553         nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, enabled_queues);
554 }
555
556 static void
557 nfp_net_disable_queues(struct rte_eth_dev *dev)
558 {
559         struct nfp_net_hw *hw;
560         uint32_t new_ctrl, update = 0;
561
562         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
563
564         nn_cfg_writeq(hw, NFP_NET_CFG_TXRS_ENABLE, 0);
565         nn_cfg_writeq(hw, NFP_NET_CFG_RXRS_ENABLE, 0);
566
567         new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_ENABLE;
568         update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING |
569                  NFP_NET_CFG_UPDATE_MSIX;
570
571         if (hw->cap & NFP_NET_CFG_CTRL_RINGCFG)
572                 new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
573
574         /* If an error when reconfig we avoid to change hw state */
575         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
576                 return;
577
578         hw->ctrl = new_ctrl;
579 }
580
581 static int
582 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev)
583 {
584         int i;
585
586         for (i = 0; i < dev->data->nb_rx_queues; i++) {
587                 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0)
588                         return -1;
589         }
590         return 0;
591 }
592
593 static void
594 nfp_net_params_setup(struct nfp_net_hw *hw)
595 {
596         nn_cfg_writel(hw, NFP_NET_CFG_MTU, hw->mtu);
597         nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, hw->flbufsz);
598 }
599
600 static void
601 nfp_net_cfg_queue_setup(struct nfp_net_hw *hw)
602 {
603         hw->qcp_cfg = hw->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
604 }
605
606 #define ETH_ADDR_LEN    6
607
608 static void
609 nfp_eth_copy_mac_reverse(uint8_t *dst, const uint8_t *src)
610 {
611         int i;
612
613         for (i = 0; i < ETH_ADDR_LEN; i++)
614                 dst[ETH_ADDR_LEN - i - 1] = src[i];
615 }
616
617 static int
618 nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port)
619 {
620         union eth_table_entry *entry;
621         int idx, i;
622
623         idx = port;
624         entry = hw->eth_table;
625
626         /* Reading NFP ethernet table obtained before */
627         for (i = 0; i < NSP_ETH_MAX_COUNT; i++) {
628                 if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) {
629                         /* port not in use */
630                         entry++;
631                         continue;
632                 }
633                 if (idx == 0)
634                         break;
635                 idx--;
636                 entry++;
637         }
638
639         if (i == NSP_ETH_MAX_COUNT)
640                 return -EINVAL;
641
642         /*
643          * hw points to port0 private data. We need hw now pointing to
644          * right port.
645          */
646         hw += port;
647         nfp_eth_copy_mac_reverse((uint8_t *)&hw->mac_addr,
648                                  (uint8_t *)&entry->mac_addr);
649
650         return 0;
651 }
652
653 static void
654 nfp_net_vf_read_mac(struct nfp_net_hw *hw)
655 {
656         uint32_t tmp;
657
658         tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR));
659         memcpy(&hw->mac_addr[0], &tmp, sizeof(struct ether_addr));
660
661         tmp = rte_be_to_cpu_32(nn_cfg_readl(hw, NFP_NET_CFG_MACADDR + 4));
662         memcpy(&hw->mac_addr[4], &tmp, 2);
663 }
664
665 static void
666 nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac)
667 {
668         uint32_t mac0 = *(uint32_t *)mac;
669         uint16_t mac1;
670
671         nn_writel(rte_cpu_to_be_32(mac0), hw->ctrl_bar + NFP_NET_CFG_MACADDR);
672
673         mac += 4;
674         mac1 = *(uint16_t *)mac;
675         nn_writew(rte_cpu_to_be_16(mac1),
676                   hw->ctrl_bar + NFP_NET_CFG_MACADDR + 6);
677 }
678
679 static int
680 nfp_configure_rx_interrupt(struct rte_eth_dev *dev,
681                            struct rte_intr_handle *intr_handle)
682 {
683         struct nfp_net_hw *hw;
684         int i;
685
686         if (!intr_handle->intr_vec) {
687                 intr_handle->intr_vec =
688                         rte_zmalloc("intr_vec",
689                                     dev->data->nb_rx_queues * sizeof(int), 0);
690                 if (!intr_handle->intr_vec) {
691                         PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
692                                      " intr_vec", dev->data->nb_rx_queues);
693                         return -ENOMEM;
694                 }
695         }
696
697         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
698
699         if (intr_handle->type == RTE_INTR_HANDLE_UIO) {
700                 PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with UIO");
701                 /* UIO just supports one queue and no LSC*/
702                 nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(0), 0);
703                 intr_handle->intr_vec[0] = 0;
704         } else {
705                 PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with VFIO");
706                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
707                         /*
708                          * The first msix vector is reserved for non
709                          * efd interrupts
710                         */
711                         nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(i), i + 1);
712                         intr_handle->intr_vec[i] = i + 1;
713                         PMD_INIT_LOG(DEBUG, "intr_vec[%d]= %d\n", i,
714                                             intr_handle->intr_vec[i]);
715                 }
716         }
717
718         /* Avoiding TX interrupts */
719         hw->ctrl |= NFP_NET_CFG_CTRL_MSIX_TX_OFF;
720         return 0;
721 }
722
723 static int
724 nfp_net_start(struct rte_eth_dev *dev)
725 {
726         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
727         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
728         uint32_t new_ctrl, update = 0;
729         struct nfp_net_hw *hw;
730         uint32_t intr_vector;
731         int ret;
732
733         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
734
735         PMD_INIT_LOG(DEBUG, "Start");
736
737         /* Disabling queues just in case... */
738         nfp_net_disable_queues(dev);
739
740         /* Writing configuration parameters in the device */
741         nfp_net_params_setup(hw);
742
743         /* Enabling the required queues in the device */
744         nfp_net_enable_queues(dev);
745
746         /* check and configure queue intr-vector mapping */
747         if (dev->data->dev_conf.intr_conf.rxq != 0) {
748                 if (hw->pf_multiport_enabled) {
749                         PMD_INIT_LOG(ERR, "PMD rx interrupt is not supported "
750                                           "with NFP multiport PF");
751                                 return -EINVAL;
752                 }
753                 if (intr_handle->type == RTE_INTR_HANDLE_UIO) {
754                         /*
755                          * Better not to share LSC with RX interrupts.
756                          * Unregistering LSC interrupt handler
757                          */
758                         rte_intr_callback_unregister(&pci_dev->intr_handle,
759                                 nfp_net_dev_interrupt_handler, (void *)dev);
760
761                         if (dev->data->nb_rx_queues > 1) {
762                                 PMD_INIT_LOG(ERR, "PMD rx interrupt only "
763                                              "supports 1 queue with UIO");
764                                 return -EIO;
765                         }
766                 }
767                 intr_vector = dev->data->nb_rx_queues;
768                 if (rte_intr_efd_enable(intr_handle, intr_vector))
769                         return -1;
770
771                 nfp_configure_rx_interrupt(dev, intr_handle);
772                 update = NFP_NET_CFG_UPDATE_MSIX;
773         }
774
775         rte_intr_enable(intr_handle);
776
777         /* Enable device */
778         new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_ENABLE;
779
780         update |= NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING;
781
782         if (hw->cap & NFP_NET_CFG_CTRL_RINGCFG)
783                 new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
784
785         nn_cfg_writel(hw, NFP_NET_CFG_CTRL, new_ctrl);
786         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
787                 return -EIO;
788
789         /*
790          * Allocating rte mbuffs for configured rx queues.
791          * This requires queues being enabled before
792          */
793         if (nfp_net_rx_freelist_setup(dev) < 0) {
794                 ret = -ENOMEM;
795                 goto error;
796         }
797
798         if (hw->is_pf)
799                 /* Configure the physical port up */
800                 nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 1);
801
802         hw->ctrl = new_ctrl;
803
804         return 0;
805
806 error:
807         /*
808          * An error returned by this function should mean the app
809          * exiting and then the system releasing all the memory
810          * allocated even memory coming from hugepages.
811          *
812          * The device could be enabled at this point with some queues
813          * ready for getting packets. This is true if the call to
814          * nfp_net_rx_freelist_setup() succeeds for some queues but
815          * fails for subsequent queues.
816          *
817          * This should make the app exiting but better if we tell the
818          * device first.
819          */
820         nfp_net_disable_queues(dev);
821
822         return ret;
823 }
824
825 /* Stop device: disable rx and tx functions to allow for reconfiguring. */
826 static void
827 nfp_net_stop(struct rte_eth_dev *dev)
828 {
829         int i;
830         struct nfp_net_hw *hw;
831
832         PMD_INIT_LOG(DEBUG, "Stop");
833
834         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
835
836         nfp_net_disable_queues(dev);
837
838         /* Clear queues */
839         for (i = 0; i < dev->data->nb_tx_queues; i++) {
840                 nfp_net_reset_tx_queue(
841                         (struct nfp_net_txq *)dev->data->tx_queues[i]);
842         }
843
844         for (i = 0; i < dev->data->nb_rx_queues; i++) {
845                 nfp_net_reset_rx_queue(
846                         (struct nfp_net_rxq *)dev->data->rx_queues[i]);
847         }
848
849         if (hw->is_pf)
850                 /* Configure the physical port down */
851                 nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 0);
852 }
853
854 /* Reset and stop device. The device can not be restarted. */
855 static void
856 nfp_net_close(struct rte_eth_dev *dev)
857 {
858         struct nfp_net_hw *hw;
859         struct rte_pci_device *pci_dev;
860         int i;
861
862         PMD_INIT_LOG(DEBUG, "Close");
863
864         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
865         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
866
867         /*
868          * We assume that the DPDK application is stopping all the
869          * threads/queues before calling the device close function.
870          */
871
872         nfp_net_disable_queues(dev);
873
874         /* Clear queues */
875         for (i = 0; i < dev->data->nb_tx_queues; i++) {
876                 nfp_net_reset_tx_queue(
877                         (struct nfp_net_txq *)dev->data->tx_queues[i]);
878         }
879
880         for (i = 0; i < dev->data->nb_rx_queues; i++) {
881                 nfp_net_reset_rx_queue(
882                         (struct nfp_net_rxq *)dev->data->rx_queues[i]);
883         }
884
885         rte_intr_disable(&pci_dev->intr_handle);
886         nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff);
887
888         /* unregister callback func from eal lib */
889         rte_intr_callback_unregister(&pci_dev->intr_handle,
890                                      nfp_net_dev_interrupt_handler,
891                                      (void *)dev);
892
893         /*
894          * The ixgbe PMD driver disables the pcie master on the
895          * device. The i40e does not...
896          */
897 }
898
899 static void
900 nfp_net_promisc_enable(struct rte_eth_dev *dev)
901 {
902         uint32_t new_ctrl, update = 0;
903         struct nfp_net_hw *hw;
904
905         PMD_DRV_LOG(DEBUG, "Promiscuous mode enable\n");
906
907         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
908
909         if (!(hw->cap & NFP_NET_CFG_CTRL_PROMISC)) {
910                 PMD_INIT_LOG(INFO, "Promiscuous mode not supported");
911                 return;
912         }
913
914         if (hw->ctrl & NFP_NET_CFG_CTRL_PROMISC) {
915                 PMD_DRV_LOG(INFO, "Promiscuous mode already enabled\n");
916                 return;
917         }
918
919         new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_PROMISC;
920         update = NFP_NET_CFG_UPDATE_GEN;
921
922         /*
923          * DPDK sets promiscuous mode on just after this call assuming
924          * it can not fail ...
925          */
926         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
927                 return;
928
929         hw->ctrl = new_ctrl;
930 }
931
932 static void
933 nfp_net_promisc_disable(struct rte_eth_dev *dev)
934 {
935         uint32_t new_ctrl, update = 0;
936         struct nfp_net_hw *hw;
937
938         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
939
940         if ((hw->ctrl & NFP_NET_CFG_CTRL_PROMISC) == 0) {
941                 PMD_DRV_LOG(INFO, "Promiscuous mode already disabled\n");
942                 return;
943         }
944
945         new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_PROMISC;
946         update = NFP_NET_CFG_UPDATE_GEN;
947
948         /*
949          * DPDK sets promiscuous mode off just before this call
950          * assuming it can not fail ...
951          */
952         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
953                 return;
954
955         hw->ctrl = new_ctrl;
956 }
957
958 /*
959  * return 0 means link status changed, -1 means not changed
960  *
961  * Wait to complete is needed as it can take up to 9 seconds to get the Link
962  * status.
963  */
964 static int
965 nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
966 {
967         struct nfp_net_hw *hw;
968         struct rte_eth_link link, old;
969         uint32_t nn_link_status;
970
971         static const uint32_t ls_to_ethtool[] = {
972                 [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = ETH_SPEED_NUM_NONE,
973                 [NFP_NET_CFG_STS_LINK_RATE_UNKNOWN]     = ETH_SPEED_NUM_NONE,
974                 [NFP_NET_CFG_STS_LINK_RATE_1G]          = ETH_SPEED_NUM_1G,
975                 [NFP_NET_CFG_STS_LINK_RATE_10G]         = ETH_SPEED_NUM_10G,
976                 [NFP_NET_CFG_STS_LINK_RATE_25G]         = ETH_SPEED_NUM_25G,
977                 [NFP_NET_CFG_STS_LINK_RATE_40G]         = ETH_SPEED_NUM_40G,
978                 [NFP_NET_CFG_STS_LINK_RATE_50G]         = ETH_SPEED_NUM_50G,
979                 [NFP_NET_CFG_STS_LINK_RATE_100G]        = ETH_SPEED_NUM_100G,
980         };
981
982         PMD_DRV_LOG(DEBUG, "Link update\n");
983
984         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
985
986         memset(&old, 0, sizeof(old));
987         nfp_net_dev_atomic_read_link_status(dev, &old);
988
989         nn_link_status = nn_cfg_readl(hw, NFP_NET_CFG_STS);
990
991         memset(&link, 0, sizeof(struct rte_eth_link));
992
993         if (nn_link_status & NFP_NET_CFG_STS_LINK)
994                 link.link_status = ETH_LINK_UP;
995
996         link.link_duplex = ETH_LINK_FULL_DUPLEX;
997
998         nn_link_status = (nn_link_status >> NFP_NET_CFG_STS_LINK_RATE_SHIFT) &
999                          NFP_NET_CFG_STS_LINK_RATE_MASK;
1000
1001         if (nn_link_status >= RTE_DIM(ls_to_ethtool))
1002                 link.link_speed = ETH_SPEED_NUM_NONE;
1003         else
1004                 link.link_speed = ls_to_ethtool[nn_link_status];
1005
1006         if (old.link_status != link.link_status) {
1007                 nfp_net_dev_atomic_write_link_status(dev, &link);
1008                 if (link.link_status)
1009                         PMD_DRV_LOG(INFO, "NIC Link is Up\n");
1010                 else
1011                         PMD_DRV_LOG(INFO, "NIC Link is Down\n");
1012                 return 0;
1013         }
1014
1015         return -1;
1016 }
1017
1018 static void
1019 nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1020 {
1021         int i;
1022         struct nfp_net_hw *hw;
1023         struct rte_eth_stats nfp_dev_stats;
1024
1025         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1026
1027         /* RTE_ETHDEV_QUEUE_STAT_CNTRS default value is 16 */
1028
1029         /* reading per RX ring stats */
1030         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1031                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
1032                         break;
1033
1034                 nfp_dev_stats.q_ipackets[i] =
1035                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i));
1036
1037                 nfp_dev_stats.q_ipackets[i] -=
1038                         hw->eth_stats_base.q_ipackets[i];
1039
1040                 nfp_dev_stats.q_ibytes[i] =
1041                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i) + 0x8);
1042
1043                 nfp_dev_stats.q_ibytes[i] -=
1044                         hw->eth_stats_base.q_ibytes[i];
1045         }
1046
1047         /* reading per TX ring stats */
1048         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1049                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
1050                         break;
1051
1052                 nfp_dev_stats.q_opackets[i] =
1053                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i));
1054
1055                 nfp_dev_stats.q_opackets[i] -=
1056                         hw->eth_stats_base.q_opackets[i];
1057
1058                 nfp_dev_stats.q_obytes[i] =
1059                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i) + 0x8);
1060
1061                 nfp_dev_stats.q_obytes[i] -=
1062                         hw->eth_stats_base.q_obytes[i];
1063         }
1064
1065         nfp_dev_stats.ipackets =
1066                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_FRAMES);
1067
1068         nfp_dev_stats.ipackets -= hw->eth_stats_base.ipackets;
1069
1070         nfp_dev_stats.ibytes =
1071                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_OCTETS);
1072
1073         nfp_dev_stats.ibytes -= hw->eth_stats_base.ibytes;
1074
1075         nfp_dev_stats.opackets =
1076                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_FRAMES);
1077
1078         nfp_dev_stats.opackets -= hw->eth_stats_base.opackets;
1079
1080         nfp_dev_stats.obytes =
1081                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
1082
1083         nfp_dev_stats.obytes -= hw->eth_stats_base.obytes;
1084
1085         /* reading general device stats */
1086         nfp_dev_stats.ierrors =
1087                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
1088
1089         nfp_dev_stats.ierrors -= hw->eth_stats_base.ierrors;
1090
1091         nfp_dev_stats.oerrors =
1092                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
1093
1094         nfp_dev_stats.oerrors -= hw->eth_stats_base.oerrors;
1095
1096         /* RX ring mbuf allocation failures */
1097         nfp_dev_stats.rx_nombuf = dev->data->rx_mbuf_alloc_failed;
1098
1099         nfp_dev_stats.imissed =
1100                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_DISCARDS);
1101
1102         nfp_dev_stats.imissed -= hw->eth_stats_base.imissed;
1103
1104         if (stats)
1105                 memcpy(stats, &nfp_dev_stats, sizeof(*stats));
1106 }
1107
1108 static void
1109 nfp_net_stats_reset(struct rte_eth_dev *dev)
1110 {
1111         int i;
1112         struct nfp_net_hw *hw;
1113
1114         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1115
1116         /*
1117          * hw->eth_stats_base records the per counter starting point.
1118          * Lets update it now
1119          */
1120
1121         /* reading per RX ring stats */
1122         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1123                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
1124                         break;
1125
1126                 hw->eth_stats_base.q_ipackets[i] =
1127                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i));
1128
1129                 hw->eth_stats_base.q_ibytes[i] =
1130                         nn_cfg_readq(hw, NFP_NET_CFG_RXR_STATS(i) + 0x8);
1131         }
1132
1133         /* reading per TX ring stats */
1134         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1135                 if (i == RTE_ETHDEV_QUEUE_STAT_CNTRS)
1136                         break;
1137
1138                 hw->eth_stats_base.q_opackets[i] =
1139                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i));
1140
1141                 hw->eth_stats_base.q_obytes[i] =
1142                         nn_cfg_readq(hw, NFP_NET_CFG_TXR_STATS(i) + 0x8);
1143         }
1144
1145         hw->eth_stats_base.ipackets =
1146                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_FRAMES);
1147
1148         hw->eth_stats_base.ibytes =
1149                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_OCTETS);
1150
1151         hw->eth_stats_base.opackets =
1152                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_FRAMES);
1153
1154         hw->eth_stats_base.obytes =
1155                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_OCTETS);
1156
1157         /* reading general device stats */
1158         hw->eth_stats_base.ierrors =
1159                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_ERRORS);
1160
1161         hw->eth_stats_base.oerrors =
1162                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_TX_ERRORS);
1163
1164         /* RX ring mbuf allocation failures */
1165         dev->data->rx_mbuf_alloc_failed = 0;
1166
1167         hw->eth_stats_base.imissed =
1168                 nn_cfg_readq(hw, NFP_NET_CFG_STATS_RX_DISCARDS);
1169 }
1170
1171 static void
1172 nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1173 {
1174         struct nfp_net_hw *hw;
1175
1176         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1177
1178         dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1179         dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
1180         dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
1181         dev_info->min_rx_bufsize = ETHER_MIN_MTU;
1182         dev_info->max_rx_pktlen = hw->mtu;
1183         /* Next should change when PF support is implemented */
1184         dev_info->max_mac_addrs = 1;
1185
1186         if (hw->cap & NFP_NET_CFG_CTRL_RXVLAN)
1187                 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1188
1189         if (hw->cap & NFP_NET_CFG_CTRL_RXCSUM)
1190                 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM |
1191                                              DEV_RX_OFFLOAD_UDP_CKSUM |
1192                                              DEV_RX_OFFLOAD_TCP_CKSUM;
1193
1194         if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
1195                 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
1196
1197         if (hw->cap & NFP_NET_CFG_CTRL_TXCSUM)
1198                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM |
1199                                              DEV_TX_OFFLOAD_UDP_CKSUM |
1200                                              DEV_TX_OFFLOAD_TCP_CKSUM;
1201
1202         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1203                 .rx_thresh = {
1204                         .pthresh = DEFAULT_RX_PTHRESH,
1205                         .hthresh = DEFAULT_RX_HTHRESH,
1206                         .wthresh = DEFAULT_RX_WTHRESH,
1207                 },
1208                 .rx_free_thresh = DEFAULT_RX_FREE_THRESH,
1209                 .rx_drop_en = 0,
1210         };
1211
1212         dev_info->default_txconf = (struct rte_eth_txconf) {
1213                 .tx_thresh = {
1214                         .pthresh = DEFAULT_TX_PTHRESH,
1215                         .hthresh = DEFAULT_TX_HTHRESH,
1216                         .wthresh = DEFAULT_TX_WTHRESH,
1217                 },
1218                 .tx_free_thresh = DEFAULT_TX_FREE_THRESH,
1219                 .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
1220                 .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
1221                              ETH_TXQ_FLAGS_NOOFFLOADS,
1222         };
1223
1224         dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
1225                                            ETH_RSS_NONFRAG_IPV4_UDP |
1226                                            ETH_RSS_NONFRAG_IPV6_TCP |
1227                                            ETH_RSS_NONFRAG_IPV6_UDP;
1228
1229         dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
1230         dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
1231
1232         dev_info->speed_capa = ETH_SPEED_NUM_1G | ETH_LINK_SPEED_10G |
1233                                ETH_SPEED_NUM_25G | ETH_SPEED_NUM_40G |
1234                                ETH_SPEED_NUM_50G | ETH_LINK_SPEED_100G;
1235
1236         if (hw->cap & NFP_NET_CFG_CTRL_LSO)
1237                 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
1238 }
1239
1240 static const uint32_t *
1241 nfp_net_supported_ptypes_get(struct rte_eth_dev *dev)
1242 {
1243         static const uint32_t ptypes[] = {
1244                 /* refers to nfp_net_set_hash() */
1245                 RTE_PTYPE_INNER_L3_IPV4,
1246                 RTE_PTYPE_INNER_L3_IPV6,
1247                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1248                 RTE_PTYPE_INNER_L4_MASK,
1249                 RTE_PTYPE_UNKNOWN
1250         };
1251
1252         if (dev->rx_pkt_burst == nfp_net_recv_pkts)
1253                 return ptypes;
1254         return NULL;
1255 }
1256
1257 static uint32_t
1258 nfp_net_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx)
1259 {
1260         struct nfp_net_rxq *rxq;
1261         struct nfp_net_rx_desc *rxds;
1262         uint32_t idx;
1263         uint32_t count;
1264
1265         rxq = (struct nfp_net_rxq *)dev->data->rx_queues[queue_idx];
1266
1267         idx = rxq->rd_p;
1268
1269         count = 0;
1270
1271         /*
1272          * Other PMDs are just checking the DD bit in intervals of 4
1273          * descriptors and counting all four if the first has the DD
1274          * bit on. Of course, this is not accurate but can be good for
1275          * performance. But ideally that should be done in descriptors
1276          * chunks belonging to the same cache line
1277          */
1278
1279         while (count < rxq->rx_count) {
1280                 rxds = &rxq->rxds[idx];
1281                 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
1282                         break;
1283
1284                 count++;
1285                 idx++;
1286
1287                 /* Wrapping? */
1288                 if ((idx) == rxq->rx_count)
1289                         idx = 0;
1290         }
1291
1292         return count;
1293 }
1294
1295 static int
1296 nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
1297 {
1298         struct rte_pci_device *pci_dev;
1299         struct nfp_net_hw *hw;
1300         int base = 0;
1301
1302         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1303         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1304
1305         if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UIO)
1306                 base = 1;
1307
1308         /* Make sure all updates are written before un-masking */
1309         rte_wmb();
1310         nn_cfg_writeb(hw, NFP_NET_CFG_ICR(base + queue_id),
1311                       NFP_NET_CFG_ICR_UNMASKED);
1312         return 0;
1313 }
1314
1315 static int
1316 nfp_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
1317 {
1318         struct rte_pci_device *pci_dev;
1319         struct nfp_net_hw *hw;
1320         int base = 0;
1321
1322         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1323         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1324
1325         if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UIO)
1326                 base = 1;
1327
1328         /* Make sure all updates are written before un-masking */
1329         rte_wmb();
1330         nn_cfg_writeb(hw, NFP_NET_CFG_ICR(base + queue_id), 0x1);
1331         return 0;
1332 }
1333
1334 static void
1335 nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
1336 {
1337         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1338         struct rte_eth_link link;
1339
1340         memset(&link, 0, sizeof(link));
1341         nfp_net_dev_atomic_read_link_status(dev, &link);
1342         if (link.link_status)
1343                 RTE_LOG(INFO, PMD, "Port %d: Link Up - speed %u Mbps - %s\n",
1344                         (int)(dev->data->port_id), (unsigned)link.link_speed,
1345                         link.link_duplex == ETH_LINK_FULL_DUPLEX
1346                         ? "full-duplex" : "half-duplex");
1347         else
1348                 RTE_LOG(INFO, PMD, " Port %d: Link Down\n",
1349                         (int)(dev->data->port_id));
1350
1351         RTE_LOG(INFO, PMD, "PCI Address: %04d:%02d:%02d:%d\n",
1352                 pci_dev->addr.domain, pci_dev->addr.bus,
1353                 pci_dev->addr.devid, pci_dev->addr.function);
1354 }
1355
1356 /* Interrupt configuration and handling */
1357
1358 /*
1359  * nfp_net_irq_unmask - Unmask an interrupt
1360  *
1361  * If MSI-X auto-masking is enabled clear the mask bit, otherwise
1362  * clear the ICR for the entry.
1363  */
1364 static void
1365 nfp_net_irq_unmask(struct rte_eth_dev *dev)
1366 {
1367         struct nfp_net_hw *hw;
1368         struct rte_pci_device *pci_dev;
1369
1370         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1371         pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1372
1373         if (hw->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
1374                 /* If MSI-X auto-masking is used, clear the entry */
1375                 rte_wmb();
1376                 rte_intr_enable(&pci_dev->intr_handle);
1377         } else {
1378                 /* Make sure all updates are written before un-masking */
1379                 rte_wmb();
1380                 nn_cfg_writeb(hw, NFP_NET_CFG_ICR(NFP_NET_IRQ_LSC_IDX),
1381                               NFP_NET_CFG_ICR_UNMASKED);
1382         }
1383 }
1384
1385 static void
1386 nfp_net_dev_interrupt_handler(void *param)
1387 {
1388         int64_t timeout;
1389         struct rte_eth_link link;
1390         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
1391
1392         PMD_DRV_LOG(DEBUG, "We got a LSC interrupt!!!\n");
1393
1394         /* get the link status */
1395         memset(&link, 0, sizeof(link));
1396         nfp_net_dev_atomic_read_link_status(dev, &link);
1397
1398         nfp_net_link_update(dev, 0);
1399
1400         /* likely to up */
1401         if (!link.link_status) {
1402                 /* handle it 1 sec later, wait it being stable */
1403                 timeout = NFP_NET_LINK_UP_CHECK_TIMEOUT;
1404                 /* likely to down */
1405         } else {
1406                 /* handle it 4 sec later, wait it being stable */
1407                 timeout = NFP_NET_LINK_DOWN_CHECK_TIMEOUT;
1408         }
1409
1410         if (rte_eal_alarm_set(timeout * 1000,
1411                               nfp_net_dev_interrupt_delayed_handler,
1412                               (void *)dev) < 0) {
1413                 RTE_LOG(ERR, PMD, "Error setting alarm");
1414                 /* Unmasking */
1415                 nfp_net_irq_unmask(dev);
1416         }
1417 }
1418
1419 /*
1420  * Interrupt handler which shall be registered for alarm callback for delayed
1421  * handling specific interrupt to wait for the stable nic state. As the NIC
1422  * interrupt state is not stable for nfp after link is just down, it needs
1423  * to wait 4 seconds to get the stable status.
1424  *
1425  * @param handle   Pointer to interrupt handle.
1426  * @param param    The address of parameter (struct rte_eth_dev *)
1427  *
1428  * @return  void
1429  */
1430 static void
1431 nfp_net_dev_interrupt_delayed_handler(void *param)
1432 {
1433         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
1434
1435         nfp_net_link_update(dev, 0);
1436         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
1437
1438         nfp_net_dev_link_status_print(dev);
1439
1440         /* Unmasking */
1441         nfp_net_irq_unmask(dev);
1442 }
1443
1444 static int
1445 nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1446 {
1447         struct nfp_net_hw *hw;
1448
1449         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1450
1451         /* check that mtu is within the allowed range */
1452         if ((mtu < ETHER_MIN_MTU) || ((uint32_t)mtu > hw->max_mtu))
1453                 return -EINVAL;
1454
1455         /* switch to jumbo mode if needed */
1456         if ((uint32_t)mtu > ETHER_MAX_LEN)
1457                 dev->data->dev_conf.rxmode.jumbo_frame = 1;
1458         else
1459                 dev->data->dev_conf.rxmode.jumbo_frame = 0;
1460
1461         /* update max frame size */
1462         dev->data->dev_conf.rxmode.max_rx_pkt_len = (uint32_t)mtu;
1463
1464         /* writing to configuration space */
1465         nn_cfg_writel(hw, NFP_NET_CFG_MTU, (uint32_t)mtu);
1466
1467         hw->mtu = mtu;
1468
1469         return 0;
1470 }
1471
1472 static int
1473 nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
1474                        uint16_t queue_idx, uint16_t nb_desc,
1475                        unsigned int socket_id,
1476                        const struct rte_eth_rxconf *rx_conf,
1477                        struct rte_mempool *mp)
1478 {
1479         const struct rte_memzone *tz;
1480         struct nfp_net_rxq *rxq;
1481         struct nfp_net_hw *hw;
1482
1483         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1484
1485         PMD_INIT_FUNC_TRACE();
1486
1487         /* Validating number of descriptors */
1488         if (((nb_desc * sizeof(struct nfp_net_rx_desc)) % 128) != 0 ||
1489             (nb_desc > NFP_NET_MAX_RX_DESC) ||
1490             (nb_desc < NFP_NET_MIN_RX_DESC)) {
1491                 RTE_LOG(ERR, PMD, "Wrong nb_desc value\n");
1492                 return -EINVAL;
1493         }
1494
1495         /*
1496          * Free memory prior to re-allocation if needed. This is the case after
1497          * calling nfp_net_stop
1498          */
1499         if (dev->data->rx_queues[queue_idx]) {
1500                 nfp_net_rx_queue_release(dev->data->rx_queues[queue_idx]);
1501                 dev->data->rx_queues[queue_idx] = NULL;
1502         }
1503
1504         /* Allocating rx queue data structure */
1505         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq),
1506                                  RTE_CACHE_LINE_SIZE, socket_id);
1507         if (rxq == NULL)
1508                 return -ENOMEM;
1509
1510         /* Hw queues mapping based on firmware confifguration */
1511         rxq->qidx = queue_idx;
1512         rxq->fl_qcidx = queue_idx * hw->stride_rx;
1513         rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1);
1514         rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
1515         rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx);
1516
1517         /*
1518          * Tracking mbuf size for detecting a potential mbuf overflow due to
1519          * RX offset
1520          */
1521         rxq->mem_pool = mp;
1522         rxq->mbuf_size = rxq->mem_pool->elt_size;
1523         rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
1524         hw->flbufsz = rxq->mbuf_size;
1525
1526         rxq->rx_count = nb_desc;
1527         rxq->port_id = dev->data->port_id;
1528         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1529         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0
1530                                   : ETHER_CRC_LEN);
1531         rxq->drop_en = rx_conf->rx_drop_en;
1532
1533         /*
1534          * Allocate RX ring hardware descriptors. A memzone large enough to
1535          * handle the maximum ring size is allocated in order to allow for
1536          * resizing in later calls to the queue setup function.
1537          */
1538         tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
1539                                    sizeof(struct nfp_net_rx_desc) *
1540                                    NFP_NET_MAX_RX_DESC, NFP_MEMZONE_ALIGN,
1541                                    socket_id);
1542
1543         if (tz == NULL) {
1544                 RTE_LOG(ERR, PMD, "Error allocatig rx dma\n");
1545                 nfp_net_rx_queue_release(rxq);
1546                 return -ENOMEM;
1547         }
1548
1549         /* Saving physical and virtual addresses for the RX ring */
1550         rxq->dma = (uint64_t)tz->phys_addr;
1551         rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
1552
1553         /* mbuf pointers array for referencing mbufs linked to RX descriptors */
1554         rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
1555                                          sizeof(*rxq->rxbufs) * nb_desc,
1556                                          RTE_CACHE_LINE_SIZE, socket_id);
1557         if (rxq->rxbufs == NULL) {
1558                 nfp_net_rx_queue_release(rxq);
1559                 return -ENOMEM;
1560         }
1561
1562         PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64 "\n",
1563                    rxq->rxbufs, rxq->rxds, (unsigned long int)rxq->dma);
1564
1565         nfp_net_reset_rx_queue(rxq);
1566
1567         dev->data->rx_queues[queue_idx] = rxq;
1568         rxq->hw = hw;
1569
1570         /*
1571          * Telling the HW about the physical address of the RX ring and number
1572          * of descriptors in log2 format
1573          */
1574         nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma);
1575         nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc));
1576
1577         return 0;
1578 }
1579
1580 static int
1581 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq)
1582 {
1583         struct nfp_net_rx_buff *rxe = rxq->rxbufs;
1584         uint64_t dma_addr;
1585         unsigned i;
1586
1587         PMD_RX_LOG(DEBUG, "nfp_net_rx_fill_freelist for %u descriptors\n",
1588                    rxq->rx_count);
1589
1590         for (i = 0; i < rxq->rx_count; i++) {
1591                 struct nfp_net_rx_desc *rxd;
1592                 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool);
1593
1594                 if (mbuf == NULL) {
1595                         RTE_LOG(ERR, PMD, "RX mbuf alloc failed queue_id=%u\n",
1596                                 (unsigned)rxq->qidx);
1597                         return -ENOMEM;
1598                 }
1599
1600                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf));
1601
1602                 rxd = &rxq->rxds[i];
1603                 rxd->fld.dd = 0;
1604                 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xff;
1605                 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
1606                 rxe[i].mbuf = mbuf;
1607                 PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64 "\n", i, dma_addr);
1608         }
1609
1610         /* Make sure all writes are flushed before telling the hardware */
1611         rte_wmb();
1612
1613         /* Not advertising the whole ring as the firmware gets confused if so */
1614         PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u\n",
1615                    rxq->rx_count - 1);
1616
1617         nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1);
1618
1619         return 0;
1620 }
1621
1622 static int
1623 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1624                        uint16_t nb_desc, unsigned int socket_id,
1625                        const struct rte_eth_txconf *tx_conf)
1626 {
1627         const struct rte_memzone *tz;
1628         struct nfp_net_txq *txq;
1629         uint16_t tx_free_thresh;
1630         struct nfp_net_hw *hw;
1631
1632         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1633
1634         PMD_INIT_FUNC_TRACE();
1635
1636         /* Validating number of descriptors */
1637         if (((nb_desc * sizeof(struct nfp_net_tx_desc)) % 128) != 0 ||
1638             (nb_desc > NFP_NET_MAX_TX_DESC) ||
1639             (nb_desc < NFP_NET_MIN_TX_DESC)) {
1640                 RTE_LOG(ERR, PMD, "Wrong nb_desc value\n");
1641                 return -EINVAL;
1642         }
1643
1644         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
1645                                     tx_conf->tx_free_thresh :
1646                                     DEFAULT_TX_FREE_THRESH);
1647
1648         if (tx_free_thresh > (nb_desc)) {
1649                 RTE_LOG(ERR, PMD,
1650                         "tx_free_thresh must be less than the number of TX "
1651                         "descriptors. (tx_free_thresh=%u port=%d "
1652                         "queue=%d)\n", (unsigned int)tx_free_thresh,
1653                         (int)dev->data->port_id, (int)queue_idx);
1654                 return -(EINVAL);
1655         }
1656
1657         /*
1658          * Free memory prior to re-allocation if needed. This is the case after
1659          * calling nfp_net_stop
1660          */
1661         if (dev->data->tx_queues[queue_idx]) {
1662                 PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d\n",
1663                            queue_idx);
1664                 nfp_net_tx_queue_release(dev->data->tx_queues[queue_idx]);
1665                 dev->data->tx_queues[queue_idx] = NULL;
1666         }
1667
1668         /* Allocating tx queue data structure */
1669         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq),
1670                                  RTE_CACHE_LINE_SIZE, socket_id);
1671         if (txq == NULL) {
1672                 RTE_LOG(ERR, PMD, "Error allocating tx dma\n");
1673                 return -ENOMEM;
1674         }
1675
1676         /*
1677          * Allocate TX ring hardware descriptors. A memzone large enough to
1678          * handle the maximum ring size is allocated in order to allow for
1679          * resizing in later calls to the queue setup function.
1680          */
1681         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
1682                                    sizeof(struct nfp_net_tx_desc) *
1683                                    NFP_NET_MAX_TX_DESC, NFP_MEMZONE_ALIGN,
1684                                    socket_id);
1685         if (tz == NULL) {
1686                 RTE_LOG(ERR, PMD, "Error allocating tx dma\n");
1687                 nfp_net_tx_queue_release(txq);
1688                 return -ENOMEM;
1689         }
1690
1691         txq->tx_count = nb_desc;
1692         txq->tx_free_thresh = tx_free_thresh;
1693         txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
1694         txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
1695         txq->tx_wthresh = tx_conf->tx_thresh.wthresh;
1696
1697         /* queue mapping based on firmware configuration */
1698         txq->qidx = queue_idx;
1699         txq->tx_qcidx = queue_idx * hw->stride_tx;
1700         txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx);
1701
1702         txq->port_id = dev->data->port_id;
1703         txq->txq_flags = tx_conf->txq_flags;
1704
1705         /* Saving physical and virtual addresses for the TX ring */
1706         txq->dma = (uint64_t)tz->phys_addr;
1707         txq->txds = (struct nfp_net_tx_desc *)tz->addr;
1708
1709         /* mbuf pointers array for referencing mbufs linked to TX descriptors */
1710         txq->txbufs = rte_zmalloc_socket("txq->txbufs",
1711                                          sizeof(*txq->txbufs) * nb_desc,
1712                                          RTE_CACHE_LINE_SIZE, socket_id);
1713         if (txq->txbufs == NULL) {
1714                 nfp_net_tx_queue_release(txq);
1715                 return -ENOMEM;
1716         }
1717         PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64 "\n",
1718                    txq->txbufs, txq->txds, (unsigned long int)txq->dma);
1719
1720         nfp_net_reset_tx_queue(txq);
1721
1722         dev->data->tx_queues[queue_idx] = txq;
1723         txq->hw = hw;
1724
1725         /*
1726          * Telling the HW about the physical address of the TX ring and number
1727          * of descriptors in log2 format
1728          */
1729         nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma);
1730         nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(nb_desc));
1731
1732         return 0;
1733 }
1734
1735 /* nfp_net_tx_tso - Set TX descriptor for TSO */
1736 static inline void
1737 nfp_net_tx_tso(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
1738                struct rte_mbuf *mb)
1739 {
1740         uint64_t ol_flags;
1741         struct nfp_net_hw *hw = txq->hw;
1742
1743         if (!(hw->cap & NFP_NET_CFG_CTRL_LSO))
1744                 goto clean_txd;
1745
1746         ol_flags = mb->ol_flags;
1747
1748         if (!(ol_flags & PKT_TX_TCP_SEG))
1749                 goto clean_txd;
1750
1751         txd->l4_offset = mb->l2_len + mb->l3_len + mb->l4_len;
1752         txd->lso = rte_cpu_to_le_16(mb->tso_segsz);
1753         txd->flags = PCIE_DESC_TX_LSO;
1754         return;
1755
1756 clean_txd:
1757         txd->flags = 0;
1758         txd->l4_offset = 0;
1759         txd->lso = 0;
1760 }
1761
1762 /* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */
1763 static inline void
1764 nfp_net_tx_cksum(struct nfp_net_txq *txq, struct nfp_net_tx_desc *txd,
1765                  struct rte_mbuf *mb)
1766 {
1767         uint64_t ol_flags;
1768         struct nfp_net_hw *hw = txq->hw;
1769
1770         if (!(hw->cap & NFP_NET_CFG_CTRL_TXCSUM))
1771                 return;
1772
1773         ol_flags = mb->ol_flags;
1774
1775         /* IPv6 does not need checksum */
1776         if (ol_flags & PKT_TX_IP_CKSUM)
1777                 txd->flags |= PCIE_DESC_TX_IP4_CSUM;
1778
1779         switch (ol_flags & PKT_TX_L4_MASK) {
1780         case PKT_TX_UDP_CKSUM:
1781                 txd->flags |= PCIE_DESC_TX_UDP_CSUM;
1782                 break;
1783         case PKT_TX_TCP_CKSUM:
1784                 txd->flags |= PCIE_DESC_TX_TCP_CSUM;
1785                 break;
1786         }
1787
1788         if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1789                 txd->flags |= PCIE_DESC_TX_CSUM;
1790 }
1791
1792 /* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */
1793 static inline void
1794 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
1795                  struct rte_mbuf *mb)
1796 {
1797         struct nfp_net_hw *hw = rxq->hw;
1798
1799         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RXCSUM))
1800                 return;
1801
1802         /* If IPv4 and IP checksum error, fail */
1803         if ((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
1804             !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))
1805                 mb->ol_flags |= PKT_RX_IP_CKSUM_BAD;
1806
1807         /* If neither UDP nor TCP return */
1808         if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
1809             !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM))
1810                 return;
1811
1812         if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
1813             !(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK))
1814                 mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
1815
1816         if ((rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) &&
1817             !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK))
1818                 mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
1819 }
1820
1821 #define NFP_HASH_OFFSET      ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 4)
1822 #define NFP_HASH_TYPE_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 8)
1823
1824 #define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK)
1825
1826 /*
1827  * nfp_net_set_hash - Set mbuf hash data
1828  *
1829  * The RSS hash and hash-type are pre-pended to the packet data.
1830  * Extract and decode it and set the mbuf fields.
1831  */
1832 static inline void
1833 nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
1834                  struct rte_mbuf *mbuf)
1835 {
1836         struct nfp_net_hw *hw = rxq->hw;
1837         uint8_t *meta_offset;
1838         uint32_t meta_info;
1839         uint32_t hash = 0;
1840         uint32_t hash_type = 0;
1841
1842         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
1843                 return;
1844
1845         if (NFD_CFG_MAJOR_VERSION_of(hw->ver) <= 3) {
1846                 if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
1847                         return;
1848
1849                 hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
1850                 hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
1851
1852         } else if (NFP_DESC_META_LEN(rxd)) {
1853                 /*
1854                  * new metadata api:
1855                  * <----  32 bit  ----->
1856                  * m    field type word
1857                  * e     data field #2
1858                  * t     data field #1
1859                  * a     data field #0
1860                  * ====================
1861                  *    packet data
1862                  *
1863                  * Field type word contains up to 8 4bit field types
1864                  * A 4bit field type refers to a data field word
1865                  * A data field word can have several 4bit field types
1866                  */
1867                 meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *);
1868                 meta_offset -= NFP_DESC_META_LEN(rxd);
1869                 meta_info = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
1870                 meta_offset += 4;
1871                 /* NFP PMD just supports metadata for hashing */
1872                 switch (meta_info & NFP_NET_META_FIELD_MASK) {
1873                 case NFP_NET_META_HASH:
1874                         /* next field type is about the hash type */
1875                         meta_info >>= NFP_NET_META_FIELD_SIZE;
1876                         /* hash value is in the data field */
1877                         hash = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
1878                         hash_type = meta_info & NFP_NET_META_FIELD_MASK;
1879                         break;
1880                 default:
1881                         /* Unsupported metadata can be a performance issue */
1882                         return;
1883                 }
1884         } else {
1885                 return;
1886         }
1887
1888         mbuf->hash.rss = hash;
1889         mbuf->ol_flags |= PKT_RX_RSS_HASH;
1890
1891         switch (hash_type) {
1892         case NFP_NET_RSS_IPV4:
1893                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV4;
1894                 break;
1895         case NFP_NET_RSS_IPV6:
1896                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6;
1897                 break;
1898         case NFP_NET_RSS_IPV6_EX:
1899                 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
1900                 break;
1901         default:
1902                 mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK;
1903         }
1904 }
1905
1906 static inline void
1907 nfp_net_mbuf_alloc_failed(struct nfp_net_rxq *rxq)
1908 {
1909         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1910 }
1911
1912 #define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK)
1913
1914 /*
1915  * RX path design:
1916  *
1917  * There are some decissions to take:
1918  * 1) How to check DD RX descriptors bit
1919  * 2) How and when to allocate new mbufs
1920  *
1921  * Current implementation checks just one single DD bit each loop. As each
1922  * descriptor is 8 bytes, it is likely a good idea to check descriptors in
1923  * a single cache line instead. Tests with this change have not shown any
1924  * performance improvement but it requires further investigation. For example,
1925  * depending on which descriptor is next, the number of descriptors could be
1926  * less than 8 for just checking those in the same cache line. This implies
1927  * extra work which could be counterproductive by itself. Indeed, last firmware
1928  * changes are just doing this: writing several descriptors with the DD bit
1929  * for saving PCIe bandwidth and DMA operations from the NFP.
1930  *
1931  * Mbuf allocation is done when a new packet is received. Then the descriptor
1932  * is automatically linked with the new mbuf and the old one is given to the
1933  * user. The main drawback with this design is mbuf allocation is heavier than
1934  * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the
1935  * cache point of view it does not seem allocating the mbuf early on as we are
1936  * doing now have any benefit at all. Again, tests with this change have not
1937  * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing
1938  * so looking at the implications of this type of allocation should be studied
1939  * deeply
1940  */
1941
1942 static uint16_t
1943 nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1944 {
1945         struct nfp_net_rxq *rxq;
1946         struct nfp_net_rx_desc *rxds;
1947         struct nfp_net_rx_buff *rxb;
1948         struct nfp_net_hw *hw;
1949         struct rte_mbuf *mb;
1950         struct rte_mbuf *new_mb;
1951         uint16_t nb_hold;
1952         uint64_t dma_addr;
1953         int avail;
1954
1955         rxq = rx_queue;
1956         if (unlikely(rxq == NULL)) {
1957                 /*
1958                  * DPDK just checks the queue is lower than max queues
1959                  * enabled. But the queue needs to be configured
1960                  */
1961                 RTE_LOG_DP(ERR, PMD, "RX Bad queue\n");
1962                 return -EINVAL;
1963         }
1964
1965         hw = rxq->hw;
1966         avail = 0;
1967         nb_hold = 0;
1968
1969         while (avail < nb_pkts) {
1970                 rxb = &rxq->rxbufs[rxq->rd_p];
1971                 if (unlikely(rxb == NULL)) {
1972                         RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
1973                         break;
1974                 }
1975
1976                 /*
1977                  * Memory barrier to ensure that we won't do other
1978                  * reads before the DD bit.
1979                  */
1980                 rte_rmb();
1981
1982                 rxds = &rxq->rxds[rxq->rd_p];
1983                 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
1984                         break;
1985
1986                 /*
1987                  * We got a packet. Let's alloc a new mbuff for refilling the
1988                  * free descriptor ring as soon as possible
1989                  */
1990                 new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
1991                 if (unlikely(new_mb == NULL)) {
1992                         RTE_LOG_DP(DEBUG, PMD, "RX mbuf alloc failed port_id=%u "
1993                                 "queue_id=%u\n", (unsigned)rxq->port_id,
1994                                 (unsigned)rxq->qidx);
1995                         nfp_net_mbuf_alloc_failed(rxq);
1996                         break;
1997                 }
1998
1999                 nb_hold++;
2000
2001                 /*
2002                  * Grab the mbuff and refill the descriptor with the
2003                  * previously allocated mbuff
2004                  */
2005                 mb = rxb->mbuf;
2006                 rxb->mbuf = new_mb;
2007
2008                 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u\n",
2009                            rxds->rxd.data_len, rxq->mbuf_size);
2010
2011                 /* Size of this segment */
2012                 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
2013                 /* Size of the whole packet. We just support 1 segment */
2014                 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
2015
2016                 if (unlikely((mb->data_len + hw->rx_offset) >
2017                              rxq->mbuf_size)) {
2018                         /*
2019                          * This should not happen and the user has the
2020                          * responsibility of avoiding it. But we have
2021                          * to give some info about the error
2022                          */
2023                         RTE_LOG_DP(ERR, PMD,
2024                                 "mbuf overflow likely due to the RX offset.\n"
2025                                 "\t\tYour mbuf size should have extra space for"
2026                                 " RX offset=%u bytes.\n"
2027                                 "\t\tCurrently you just have %u bytes available"
2028                                 " but the received packet is %u bytes long",
2029                                 hw->rx_offset,
2030                                 rxq->mbuf_size - hw->rx_offset,
2031                                 mb->data_len);
2032                         return -EINVAL;
2033                 }
2034
2035                 /* Filling the received mbuff with packet info */
2036                 if (hw->rx_offset)
2037                         mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset;
2038                 else
2039                         mb->data_off = RTE_PKTMBUF_HEADROOM +
2040                                        NFP_DESC_META_LEN(rxds);
2041
2042                 /* No scatter mode supported */
2043                 mb->nb_segs = 1;
2044                 mb->next = NULL;
2045
2046                 /* Checking the RSS flag */
2047                 nfp_net_set_hash(rxq, rxds, mb);
2048
2049                 /* Checking the checksum flag */
2050                 nfp_net_rx_cksum(rxq, rxds, mb);
2051
2052                 if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
2053                     (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
2054                         mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
2055                         mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
2056                 }
2057
2058                 /* Adding the mbuff to the mbuff array passed by the app */
2059                 rx_pkts[avail++] = mb;
2060
2061                 /* Now resetting and updating the descriptor */
2062                 rxds->vals[0] = 0;
2063                 rxds->vals[1] = 0;
2064                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb));
2065                 rxds->fld.dd = 0;
2066                 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xff;
2067                 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
2068
2069                 rxq->rd_p++;
2070                 if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
2071                         rxq->rd_p = 0;
2072         }
2073
2074         if (nb_hold == 0)
2075                 return nb_hold;
2076
2077         PMD_RX_LOG(DEBUG, "RX  port_id=%u queue_id=%u, %d packets received\n",
2078                    (unsigned)rxq->port_id, (unsigned)rxq->qidx, nb_hold);
2079
2080         nb_hold += rxq->nb_rx_hold;
2081
2082         /*
2083          * FL descriptors needs to be written before incrementing the
2084          * FL queue WR pointer
2085          */
2086         rte_wmb();
2087         if (nb_hold > rxq->rx_free_thresh) {
2088                 PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u\n",
2089                            (unsigned)rxq->port_id, (unsigned)rxq->qidx,
2090                            (unsigned)nb_hold, (unsigned)avail);
2091                 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold);
2092                 nb_hold = 0;
2093         }
2094         rxq->nb_rx_hold = nb_hold;
2095
2096         return avail;
2097 }
2098
2099 /*
2100  * nfp_net_tx_free_bufs - Check for descriptors with a complete
2101  * status
2102  * @txq: TX queue to work with
2103  * Returns number of descriptors freed
2104  */
2105 int
2106 nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
2107 {
2108         uint32_t qcp_rd_p;
2109         int todo;
2110
2111         PMD_TX_LOG(DEBUG, "queue %u. Check for descriptor with a complete"
2112                    " status\n", txq->qidx);
2113
2114         /* Work out how many packets have been sent */
2115         qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
2116
2117         if (qcp_rd_p == txq->rd_p) {
2118                 PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
2119                            "packets (%u, %u)\n", txq->qidx,
2120                            qcp_rd_p, txq->rd_p);
2121                 return 0;
2122         }
2123
2124         if (qcp_rd_p > txq->rd_p)
2125                 todo = qcp_rd_p - txq->rd_p;
2126         else
2127                 todo = qcp_rd_p + txq->tx_count - txq->rd_p;
2128
2129         PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u\n",
2130                    qcp_rd_p, txq->rd_p, txq->rd_p);
2131
2132         if (todo == 0)
2133                 return todo;
2134
2135         txq->rd_p += todo;
2136         if (unlikely(txq->rd_p >= txq->tx_count))
2137                 txq->rd_p -= txq->tx_count;
2138
2139         return todo;
2140 }
2141
2142 /* Leaving always free descriptors for avoiding wrapping confusion */
2143 static inline
2144 uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq)
2145 {
2146         if (txq->wr_p >= txq->rd_p)
2147                 return txq->tx_count - (txq->wr_p - txq->rd_p) - 8;
2148         else
2149                 return txq->rd_p - txq->wr_p - 8;
2150 }
2151
2152 /*
2153  * nfp_net_txq_full - Check if the TX queue free descriptors
2154  * is below tx_free_threshold
2155  *
2156  * @txq: TX queue to check
2157  *
2158  * This function uses the host copy* of read/write pointers
2159  */
2160 static inline
2161 uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
2162 {
2163         return (nfp_free_tx_desc(txq) < txq->tx_free_thresh);
2164 }
2165
2166 static uint16_t
2167 nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2168 {
2169         struct nfp_net_txq *txq;
2170         struct nfp_net_hw *hw;
2171         struct nfp_net_tx_desc *txds, txd;
2172         struct rte_mbuf *pkt;
2173         uint64_t dma_addr;
2174         int pkt_size, dma_size;
2175         uint16_t free_descs, issued_descs;
2176         struct rte_mbuf **lmbuf;
2177         int i;
2178
2179         txq = tx_queue;
2180         hw = txq->hw;
2181         txds = &txq->txds[txq->wr_p];
2182
2183         PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets\n",
2184                    txq->qidx, txq->wr_p, nb_pkts);
2185
2186         if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
2187                 nfp_net_tx_free_bufs(txq);
2188
2189         free_descs = (uint16_t)nfp_free_tx_desc(txq);
2190         if (unlikely(free_descs == 0))
2191                 return 0;
2192
2193         pkt = *tx_pkts;
2194
2195         i = 0;
2196         issued_descs = 0;
2197         PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets\n",
2198                    txq->qidx, nb_pkts);
2199         /* Sending packets */
2200         while ((i < nb_pkts) && free_descs) {
2201                 /* Grabbing the mbuf linked to the current descriptor */
2202                 lmbuf = &txq->txbufs[txq->wr_p].mbuf;
2203                 /* Warming the cache for releasing the mbuf later on */
2204                 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
2205
2206                 pkt = *(tx_pkts + i);
2207
2208                 if (unlikely((pkt->nb_segs > 1) &&
2209                              !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) {
2210                         PMD_INIT_LOG(INFO, "NFP_NET_CFG_CTRL_GATHER not set");
2211                         rte_panic("Multisegment packet unsupported\n");
2212                 }
2213
2214                 /* Checking if we have enough descriptors */
2215                 if (unlikely(pkt->nb_segs > free_descs))
2216                         goto xmit_end;
2217
2218                 /*
2219                  * Checksum and VLAN flags just in the first descriptor for a
2220                  * multisegment packet, but TSO info needs to be in all of them.
2221                  */
2222                 txd.data_len = pkt->pkt_len;
2223                 nfp_net_tx_tso(txq, &txd, pkt);
2224                 nfp_net_tx_cksum(txq, &txd, pkt);
2225
2226                 if ((pkt->ol_flags & PKT_TX_VLAN_PKT) &&
2227                     (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)) {
2228                         txd.flags |= PCIE_DESC_TX_VLAN;
2229                         txd.vlan = pkt->vlan_tci;
2230                 }
2231
2232                 /*
2233                  * mbuf data_len is the data in one segment and pkt_len data
2234                  * in the whole packet. When the packet is just one segment,
2235                  * then data_len = pkt_len
2236                  */
2237                 pkt_size = pkt->pkt_len;
2238
2239                 while (pkt) {
2240                         /* Copying TSO, VLAN and cksum info */
2241                         *txds = txd;
2242
2243                         /* Releasing mbuf used by this descriptor previously*/
2244                         if (*lmbuf)
2245                                 rte_pktmbuf_free_seg(*lmbuf);
2246
2247                         /*
2248                          * Linking mbuf with descriptor for being released
2249                          * next time descriptor is used
2250                          */
2251                         *lmbuf = pkt;
2252
2253                         dma_size = pkt->data_len;
2254                         dma_addr = rte_mbuf_data_dma_addr(pkt);
2255                         PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:"
2256                                    "%" PRIx64 "\n", dma_addr);
2257
2258                         /* Filling descriptors fields */
2259                         txds->dma_len = dma_size;
2260                         txds->data_len = txd.data_len;
2261                         txds->dma_addr_hi = (dma_addr >> 32) & 0xff;
2262                         txds->dma_addr_lo = (dma_addr & 0xffffffff);
2263                         ASSERT(free_descs > 0);
2264                         free_descs--;
2265
2266                         txq->wr_p++;
2267                         if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/
2268                                 txq->wr_p = 0;
2269
2270                         pkt_size -= dma_size;
2271                         if (!pkt_size)
2272                                 /* End of packet */
2273                                 txds->offset_eop |= PCIE_DESC_TX_EOP;
2274                         else
2275                                 txds->offset_eop &= PCIE_DESC_TX_OFFSET_MASK;
2276
2277                         pkt = pkt->next;
2278                         /* Referencing next free TX descriptor */
2279                         txds = &txq->txds[txq->wr_p];
2280                         lmbuf = &txq->txbufs[txq->wr_p].mbuf;
2281                         issued_descs++;
2282                 }
2283                 i++;
2284         }
2285
2286 xmit_end:
2287         /* Increment write pointers. Force memory write before we let HW know */
2288         rte_wmb();
2289         nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs);
2290
2291         return i;
2292 }
2293
2294 static void
2295 nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2296 {
2297         uint32_t new_ctrl, update;
2298         struct nfp_net_hw *hw;
2299
2300         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2301         new_ctrl = 0;
2302
2303         if ((mask & ETH_VLAN_FILTER_OFFLOAD) ||
2304             (mask & ETH_VLAN_EXTEND_OFFLOAD))
2305                 RTE_LOG(INFO, PMD, "No support for ETH_VLAN_FILTER_OFFLOAD or"
2306                         " ETH_VLAN_EXTEND_OFFLOAD");
2307
2308         /* Enable vlan strip if it is not configured yet */
2309         if ((mask & ETH_VLAN_STRIP_OFFLOAD) &&
2310             !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN))
2311                 new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN;
2312
2313         /* Disable vlan strip just if it is configured */
2314         if (!(mask & ETH_VLAN_STRIP_OFFLOAD) &&
2315             (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN))
2316                 new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN;
2317
2318         if (new_ctrl == 0)
2319                 return;
2320
2321         update = NFP_NET_CFG_UPDATE_GEN;
2322
2323         if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
2324                 return;
2325
2326         hw->ctrl = new_ctrl;
2327 }
2328
2329 /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
2330 static int
2331 nfp_net_reta_update(struct rte_eth_dev *dev,
2332                     struct rte_eth_rss_reta_entry64 *reta_conf,
2333                     uint16_t reta_size)
2334 {
2335         uint32_t reta, mask;
2336         int i, j;
2337         int idx, shift;
2338         uint32_t update;
2339         struct nfp_net_hw *hw =
2340                 NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2341
2342         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
2343                 return -EINVAL;
2344
2345         if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) {
2346                 RTE_LOG(ERR, PMD, "The size of hash lookup table configured "
2347                         "(%d) doesn't match the number hardware can supported "
2348                         "(%d)\n", reta_size, NFP_NET_CFG_RSS_ITBL_SZ);
2349                 return -EINVAL;
2350         }
2351
2352         /*
2353          * Update Redirection Table. There are 128 8bit-entries which can be
2354          * manage as 32 32bit-entries
2355          */
2356         for (i = 0; i < reta_size; i += 4) {
2357                 /* Handling 4 RSS entries per loop */
2358                 idx = i / RTE_RETA_GROUP_SIZE;
2359                 shift = i % RTE_RETA_GROUP_SIZE;
2360                 mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xF);
2361
2362                 if (!mask)
2363                         continue;
2364
2365                 reta = 0;
2366                 /* If all 4 entries were set, don't need read RETA register */
2367                 if (mask != 0xF)
2368                         reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + i);
2369
2370                 for (j = 0; j < 4; j++) {
2371                         if (!(mask & (0x1 << j)))
2372                                 continue;
2373                         if (mask != 0xF)
2374                                 /* Clearing the entry bits */
2375                                 reta &= ~(0xFF << (8 * j));
2376                         reta |= reta_conf[idx].reta[shift + j] << (8 * j);
2377                 }
2378                 nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) + shift,
2379                               reta);
2380         }
2381
2382         update = NFP_NET_CFG_UPDATE_RSS;
2383
2384         if (nfp_net_reconfig(hw, hw->ctrl, update) < 0)
2385                 return -EIO;
2386
2387         return 0;
2388 }
2389
2390  /* Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. */
2391 static int
2392 nfp_net_reta_query(struct rte_eth_dev *dev,
2393                    struct rte_eth_rss_reta_entry64 *reta_conf,
2394                    uint16_t reta_size)
2395 {
2396         uint8_t i, j, mask;
2397         int idx, shift;
2398         uint32_t reta;
2399         struct nfp_net_hw *hw;
2400
2401         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2402
2403         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
2404                 return -EINVAL;
2405
2406         if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) {
2407                 RTE_LOG(ERR, PMD, "The size of hash lookup table configured "
2408                         "(%d) doesn't match the number hardware can supported "
2409                         "(%d)\n", reta_size, NFP_NET_CFG_RSS_ITBL_SZ);
2410                 return -EINVAL;
2411         }
2412
2413         /*
2414          * Reading Redirection Table. There are 128 8bit-entries which can be
2415          * manage as 32 32bit-entries
2416          */
2417         for (i = 0; i < reta_size; i += 4) {
2418                 /* Handling 4 RSS entries per loop */
2419                 idx = i / RTE_RETA_GROUP_SIZE;
2420                 shift = i % RTE_RETA_GROUP_SIZE;
2421                 mask = (uint8_t)((reta_conf[idx].mask >> shift) & 0xF);
2422
2423                 if (!mask)
2424                         continue;
2425
2426                 reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) +
2427                                     shift);
2428                 for (j = 0; j < 4; j++) {
2429                         if (!(mask & (0x1 << j)))
2430                                 continue;
2431                         reta_conf->reta[shift + j] =
2432                                 (uint8_t)((reta >> (8 * j)) & 0xF);
2433                 }
2434         }
2435         return 0;
2436 }
2437
2438 static int
2439 nfp_net_rss_hash_update(struct rte_eth_dev *dev,
2440                         struct rte_eth_rss_conf *rss_conf)
2441 {
2442         uint32_t update;
2443         uint32_t cfg_rss_ctrl = 0;
2444         uint8_t key;
2445         uint64_t rss_hf;
2446         int i;
2447         struct nfp_net_hw *hw;
2448
2449         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2450
2451         rss_hf = rss_conf->rss_hf;
2452
2453         /* Checking if RSS is enabled */
2454         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) {
2455                 if (rss_hf != 0) { /* Enable RSS? */
2456                         RTE_LOG(ERR, PMD, "RSS unsupported\n");
2457                         return -EINVAL;
2458                 }
2459                 return 0; /* Nothing to do */
2460         }
2461
2462         if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) {
2463                 RTE_LOG(ERR, PMD, "hash key too long\n");
2464                 return -EINVAL;
2465         }
2466
2467         if (rss_hf & ETH_RSS_IPV4)
2468                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 |
2469                                 NFP_NET_CFG_RSS_IPV4_TCP |
2470                                 NFP_NET_CFG_RSS_IPV4_UDP;
2471
2472         if (rss_hf & ETH_RSS_IPV6)
2473                 cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6 |
2474                                 NFP_NET_CFG_RSS_IPV6_TCP |
2475                                 NFP_NET_CFG_RSS_IPV6_UDP;
2476
2477         cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
2478         cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
2479
2480         /* configuring where to apply the RSS hash */
2481         nn_cfg_writel(hw, NFP_NET_CFG_RSS_CTRL, cfg_rss_ctrl);
2482
2483         /* Writing the key byte a byte */
2484         for (i = 0; i < rss_conf->rss_key_len; i++) {
2485                 memcpy(&key, &rss_conf->rss_key[i], 1);
2486                 nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key);
2487         }
2488
2489         /* Writing the key size */
2490         nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY_SZ, rss_conf->rss_key_len);
2491
2492         update = NFP_NET_CFG_UPDATE_RSS;
2493
2494         if (nfp_net_reconfig(hw, hw->ctrl, update) < 0)
2495                 return -EIO;
2496
2497         return 0;
2498 }
2499
2500 static int
2501 nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev,
2502                           struct rte_eth_rss_conf *rss_conf)
2503 {
2504         uint64_t rss_hf;
2505         uint32_t cfg_rss_ctrl;
2506         uint8_t key;
2507         int i;
2508         struct nfp_net_hw *hw;
2509
2510         hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2511
2512         if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
2513                 return -EINVAL;
2514
2515         rss_hf = rss_conf->rss_hf;
2516         cfg_rss_ctrl = nn_cfg_readl(hw, NFP_NET_CFG_RSS_CTRL);
2517
2518         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4)
2519                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP;
2520
2521         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4_TCP)
2522                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2523
2524         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6_TCP)
2525                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2526
2527         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV4_UDP)
2528                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2529
2530         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6_UDP)
2531                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2532
2533         if (cfg_rss_ctrl & NFP_NET_CFG_RSS_IPV6)
2534                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP;
2535
2536         /* Reading the key size */
2537         rss_conf->rss_key_len = nn_cfg_readl(hw, NFP_NET_CFG_RSS_KEY_SZ);
2538
2539         /* Reading the key byte a byte */
2540         for (i = 0; i < rss_conf->rss_key_len; i++) {
2541                 key = nn_cfg_readb(hw, NFP_NET_CFG_RSS_KEY + i);
2542                 memcpy(&rss_conf->rss_key[i], &key, 1);
2543         }
2544
2545         return 0;
2546 }
2547
2548 /* Initialise and register driver with DPDK Application */
2549 static const struct eth_dev_ops nfp_net_eth_dev_ops = {
2550         .dev_configure          = nfp_net_configure,
2551         .dev_start              = nfp_net_start,
2552         .dev_stop               = nfp_net_stop,
2553         .dev_close              = nfp_net_close,
2554         .promiscuous_enable     = nfp_net_promisc_enable,
2555         .promiscuous_disable    = nfp_net_promisc_disable,
2556         .link_update            = nfp_net_link_update,
2557         .stats_get              = nfp_net_stats_get,
2558         .stats_reset            = nfp_net_stats_reset,
2559         .dev_infos_get          = nfp_net_infos_get,
2560         .dev_supported_ptypes_get = nfp_net_supported_ptypes_get,
2561         .mtu_set                = nfp_net_dev_mtu_set,
2562         .vlan_offload_set       = nfp_net_vlan_offload_set,
2563         .reta_update            = nfp_net_reta_update,
2564         .reta_query             = nfp_net_reta_query,
2565         .rss_hash_update        = nfp_net_rss_hash_update,
2566         .rss_hash_conf_get      = nfp_net_rss_hash_conf_get,
2567         .rx_queue_setup         = nfp_net_rx_queue_setup,
2568         .rx_queue_release       = nfp_net_rx_queue_release,
2569         .rx_queue_count         = nfp_net_rx_queue_count,
2570         .tx_queue_setup         = nfp_net_tx_queue_setup,
2571         .tx_queue_release       = nfp_net_tx_queue_release,
2572         .rx_queue_intr_enable   = nfp_rx_queue_intr_enable,
2573         .rx_queue_intr_disable  = nfp_rx_queue_intr_disable,
2574 };
2575
2576 /*
2577  * All eth_dev created got its private data, but before nfp_net_init, that
2578  * private data is referencing private data for all the PF ports. This is due
2579  * to how the vNIC bars are mapped based on first port, so all ports need info
2580  * about port 0 private data. Inside nfp_net_init the private data pointer is
2581  * changed to the right address for each port once the bars have been mapped.
2582  *
2583  * This functions helps to find out which port and therefore which offset
2584  * inside the private data array to use.
2585  */
2586 static int
2587 get_pf_port_number(char *name)
2588 {
2589         char *pf_str = name;
2590         int size = 0;
2591
2592         while ((*pf_str != '_') && (*pf_str != '\0') && (size++ < 30))
2593                 pf_str++;
2594
2595         if (size == 30)
2596                 /*
2597                  * This should not happen at all and it would mean major
2598                  * implementation fault.
2599                  */
2600                 rte_panic("nfp_net: problem with pf device name\n");
2601
2602         /* Expecting _portX with X within [0,7] */
2603         pf_str += 5;
2604
2605         return (int)strtol(pf_str, NULL, 10);
2606 }
2607
2608 static int
2609 nfp_net_init(struct rte_eth_dev *eth_dev)
2610 {
2611         struct rte_pci_device *pci_dev;
2612         struct nfp_net_hw *hw, *hwport0;
2613
2614         uint64_t tx_bar_off = 0, rx_bar_off = 0;
2615         uint32_t start_q;
2616         int stride = 4;
2617
2618         nspu_desc_t *nspu_desc = NULL;
2619         uint64_t bar_offset;
2620         int port = 0;
2621
2622         PMD_INIT_FUNC_TRACE();
2623
2624         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2625
2626         if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
2627             (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
2628                 port = get_pf_port_number(eth_dev->data->name);
2629                 if (port < 0 || port > 7) {
2630                         RTE_LOG(ERR, PMD, "Port value is wrong\n");
2631                         return -ENODEV;
2632                 }
2633
2634                 PMD_INIT_LOG(DEBUG, "Working with PF port value %d\n", port);
2635
2636                 /* This points to port 0 private data */
2637                 hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2638
2639                 /* This points to the specific port private data */
2640                 hw = &hwport0[port];
2641                 hw->pf_port_idx = port;
2642         } else {
2643                 hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2644                 hwport0 = 0;
2645         }
2646
2647         eth_dev->dev_ops = &nfp_net_eth_dev_ops;
2648         eth_dev->rx_pkt_burst = &nfp_net_recv_pkts;
2649         eth_dev->tx_pkt_burst = &nfp_net_xmit_pkts;
2650
2651         /* For secondary processes, the primary has done all the work */
2652         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2653                 return 0;
2654
2655         rte_eth_copy_pci_info(eth_dev, pci_dev);
2656         /* hotplug is not possible with multiport PF */
2657         if (!hw->pf_multiport_enabled)
2658                 eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
2659
2660         hw->device_id = pci_dev->id.device_id;
2661         hw->vendor_id = pci_dev->id.vendor_id;
2662         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
2663         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
2664
2665         PMD_INIT_LOG(DEBUG, "nfp_net: device (%u:%u) %u:%u:%u:%u",
2666                      pci_dev->id.vendor_id, pci_dev->id.device_id,
2667                      pci_dev->addr.domain, pci_dev->addr.bus,
2668                      pci_dev->addr.devid, pci_dev->addr.function);
2669
2670         hw->ctrl_bar = (uint8_t *)pci_dev->mem_resource[0].addr;
2671         if (hw->ctrl_bar == NULL) {
2672                 RTE_LOG(ERR, PMD,
2673                         "hw->ctrl_bar is NULL. BAR0 not configured\n");
2674                 return -ENODEV;
2675         }
2676
2677         if (hw->is_pf && port == 0) {
2678                 nspu_desc = hw->nspu_desc;
2679
2680                 if (nfp_nsp_map_ctrl_bar(nspu_desc, &bar_offset) != 0) {
2681                         /*
2682                          * A firmware should be there after PF probe so this
2683                          * should not happen.
2684                          */
2685                         RTE_LOG(ERR, PMD, "PF BAR symbol resolution failed\n");
2686                         return -ENODEV;
2687                 }
2688
2689                 /* vNIC PF control BAR is a subset of PF PCI device BAR */
2690                 hw->ctrl_bar += bar_offset;
2691                 PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar);
2692         }
2693
2694         if (port > 0) {
2695                 if (!hwport0->ctrl_bar)
2696                         return -ENODEV;
2697
2698                 /* address based on port0 offset */
2699                 hw->ctrl_bar = hwport0->ctrl_bar +
2700                                (port * NFP_PF_CSR_SLICE_SIZE);
2701         }
2702
2703         PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar);
2704
2705         hw->max_rx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_RXRINGS);
2706         hw->max_tx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_TXRINGS);
2707
2708         /* Work out where in the BAR the queues start. */
2709         switch (pci_dev->id.device_id) {
2710         case PCI_DEVICE_ID_NFP4000_PF_NIC:
2711         case PCI_DEVICE_ID_NFP6000_PF_NIC:
2712         case PCI_DEVICE_ID_NFP6000_VF_NIC:
2713                 start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ);
2714                 tx_bar_off = NFP_PCIE_QUEUE(start_q);
2715                 start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_RXQ);
2716                 rx_bar_off = NFP_PCIE_QUEUE(start_q);
2717                 break;
2718         default:
2719                 RTE_LOG(ERR, PMD, "nfp_net: no device ID matching\n");
2720                 return -ENODEV;
2721         }
2722
2723         PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "\n", tx_bar_off);
2724         PMD_INIT_LOG(DEBUG, "rx_bar_off: 0x%" PRIx64 "\n", rx_bar_off);
2725
2726         if (hw->is_pf && port == 0) {
2727                 /* configure access to tx/rx vNIC BARs */
2728                 nfp_nsp_map_queues_bar(nspu_desc, &bar_offset);
2729                 PMD_INIT_LOG(DEBUG, "tx/rx bar_offset: %" PRIx64 "\n",
2730                                     bar_offset);
2731                 hwport0->hw_queues = (uint8_t *)pci_dev->mem_resource[0].addr;
2732
2733                 /* vNIC PF tx/rx BARs are a subset of PF PCI device */
2734                 hwport0->hw_queues += bar_offset;
2735
2736                 /* Lets seize the chance to read eth table from hw */
2737                 if (nfp_nsp_eth_read_table(nspu_desc, &hw->eth_table))
2738                         return -ENODEV;
2739         }
2740
2741         if (hw->is_pf) {
2742                 hw->tx_bar = hwport0->hw_queues + tx_bar_off;
2743                 hw->rx_bar = hwport0->hw_queues + rx_bar_off;
2744                 eth_dev->data->dev_private = hw;
2745         } else {
2746                 hw->tx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
2747                              tx_bar_off;
2748                 hw->rx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
2749                              rx_bar_off;
2750         }
2751
2752         PMD_INIT_LOG(DEBUG, "ctrl_bar: %p, tx_bar: %p, rx_bar: %p",
2753                      hw->ctrl_bar, hw->tx_bar, hw->rx_bar);
2754
2755         nfp_net_cfg_queue_setup(hw);
2756
2757         /* Get some of the read-only fields from the config BAR */
2758         hw->ver = nn_cfg_readl(hw, NFP_NET_CFG_VERSION);
2759         hw->cap = nn_cfg_readl(hw, NFP_NET_CFG_CAP);
2760         hw->max_mtu = nn_cfg_readl(hw, NFP_NET_CFG_MAX_MTU);
2761         hw->mtu = hw->max_mtu;
2762
2763         if (NFD_CFG_MAJOR_VERSION_of(hw->ver) < 2)
2764                 hw->rx_offset = NFP_NET_RX_OFFSET;
2765         else
2766                 hw->rx_offset = nn_cfg_readl(hw, NFP_NET_CFG_RX_OFFSET_ADDR);
2767
2768         PMD_INIT_LOG(INFO, "VER: %#x, Maximum supported MTU: %d",
2769                      hw->ver, hw->max_mtu);
2770         PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s%s%s", hw->cap,
2771                      hw->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
2772                      hw->cap & NFP_NET_CFG_CTRL_L2BC    ? "L2BCFILT " : "",
2773                      hw->cap & NFP_NET_CFG_CTRL_L2MC    ? "L2MCFILT " : "",
2774                      hw->cap & NFP_NET_CFG_CTRL_RXCSUM  ? "RXCSUM "  : "",
2775                      hw->cap & NFP_NET_CFG_CTRL_TXCSUM  ? "TXCSUM "  : "",
2776                      hw->cap & NFP_NET_CFG_CTRL_RXVLAN  ? "RXVLAN "  : "",
2777                      hw->cap & NFP_NET_CFG_CTRL_TXVLAN  ? "TXVLAN "  : "",
2778                      hw->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "",
2779                      hw->cap & NFP_NET_CFG_CTRL_GATHER  ? "GATHER "  : "",
2780                      hw->cap & NFP_NET_CFG_CTRL_LSO     ? "TSO "     : "",
2781                      hw->cap & NFP_NET_CFG_CTRL_RSS     ? "RSS "     : "");
2782
2783         hw->ctrl = 0;
2784
2785         hw->stride_rx = stride;
2786         hw->stride_tx = stride;
2787
2788         PMD_INIT_LOG(INFO, "max_rx_queues: %u, max_tx_queues: %u",
2789                      hw->max_rx_queues, hw->max_tx_queues);
2790
2791         /* Initializing spinlock for reconfigs */
2792         rte_spinlock_init(&hw->reconfig_lock);
2793
2794         /* Allocating memory for mac addr */
2795         eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0);
2796         if (eth_dev->data->mac_addrs == NULL) {
2797                 PMD_INIT_LOG(ERR, "Failed to space for MAC address");
2798                 return -ENOMEM;
2799         }
2800
2801         if (hw->is_pf)
2802                 nfp_net_pf_read_mac(hwport0, port);
2803         else
2804                 nfp_net_vf_read_mac(hw);
2805
2806         if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr)) {
2807                 /* Using random mac addresses for VFs */
2808                 eth_random_addr(&hw->mac_addr[0]);
2809                 nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
2810         }
2811
2812         /* Copying mac address to DPDK eth_dev struct */
2813         ether_addr_copy((struct ether_addr *)hw->mac_addr,
2814                         &eth_dev->data->mac_addrs[0]);
2815
2816         PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x "
2817                      "mac=%02x:%02x:%02x:%02x:%02x:%02x",
2818                      eth_dev->data->port_id, pci_dev->id.vendor_id,
2819                      pci_dev->id.device_id,
2820                      hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
2821                      hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
2822
2823         /* Registering LSC interrupt handler */
2824         rte_intr_callback_register(&pci_dev->intr_handle,
2825                                    nfp_net_dev_interrupt_handler,
2826                                    (void *)eth_dev);
2827
2828         /* Telling the firmware about the LSC interrupt entry */
2829         nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
2830
2831         /* Recording current stats counters values */
2832         nfp_net_stats_reset(eth_dev);
2833
2834         return 0;
2835 }
2836
2837 static int
2838 nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
2839                   nfpu_desc_t *nfpu_desc, void **priv)
2840 {
2841         struct rte_eth_dev *eth_dev;
2842         struct nfp_net_hw *hw;
2843         char *port_name;
2844         int ret;
2845
2846         port_name = rte_zmalloc("nfp_pf_port_name", 100, 0);
2847         if (!port_name)
2848                 return -ENOMEM;
2849
2850         if (ports > 1)
2851                 sprintf(port_name, "%s_port%d", dev->device.name, port);
2852         else
2853                 sprintf(port_name, "%s", dev->device.name);
2854
2855         eth_dev = rte_eth_dev_allocate(port_name);
2856         if (!eth_dev)
2857                 return -ENOMEM;
2858
2859         if (port == 0) {
2860                 *priv = rte_zmalloc(port_name,
2861                                     sizeof(struct nfp_net_adapter) * ports,
2862                                     RTE_CACHE_LINE_SIZE);
2863                 if (!*priv) {
2864                         rte_eth_dev_release_port(eth_dev);
2865                         return -ENOMEM;
2866                 }
2867         }
2868
2869         eth_dev->data->dev_private = *priv;
2870
2871         /*
2872          * dev_private pointing to port0 dev_private because we need
2873          * to configure vNIC bars based on port0 at nfp_net_init.
2874          * Then dev_private is adjusted per port.
2875          */
2876         hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
2877         hw->nspu_desc = nfpu_desc->nspu;
2878         hw->nfpu_desc = nfpu_desc;
2879         hw->is_pf = 1;
2880         if (ports > 1)
2881                 hw->pf_multiport_enabled = 1;
2882
2883         eth_dev->device = &dev->device;
2884         rte_eth_copy_pci_info(eth_dev, dev);
2885
2886         ret = nfp_net_init(eth_dev);
2887
2888         if (ret)
2889                 rte_eth_dev_release_port(eth_dev);
2890
2891         rte_free(port_name);
2892
2893         return ret;
2894 }
2895
2896 static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2897                             struct rte_pci_device *dev)
2898 {
2899         nfpu_desc_t *nfpu_desc;
2900         nspu_desc_t *nspu_desc;
2901         uint64_t offset_symbol;
2902         uint8_t *bar_offset;
2903         int major, minor;
2904         int total_ports;
2905         void *priv = 0;
2906         int ret = -ENODEV;
2907         int i;
2908
2909         if (!dev)
2910                 return ret;
2911
2912         nfpu_desc = rte_malloc("nfp nfpu", sizeof(nfpu_desc_t), 0);
2913         if (!nfpu_desc)
2914                 return -ENOMEM;
2915
2916         if (nfpu_open(dev, nfpu_desc, 0) < 0) {
2917                 RTE_LOG(ERR, PMD,
2918                         "nfpu_open failed\n");
2919                 goto nfpu_error;
2920         }
2921
2922         nspu_desc = nfpu_desc->nspu;
2923
2924
2925         /* Check NSP ABI version */
2926         if (nfp_nsp_get_abi_version(nspu_desc, &major, &minor) < 0) {
2927                 RTE_LOG(INFO, PMD, "NFP NSP not present\n");
2928                 goto error;
2929         }
2930         PMD_INIT_LOG(INFO, "nspu ABI version: %d.%d\n", major, minor);
2931
2932         if ((major == 0) && (minor < 20)) {
2933                 RTE_LOG(INFO, PMD, "NFP NSP ABI version too old. Required 0.20 or higher\n");
2934                 goto error;
2935         }
2936
2937         ret = nfp_nsp_fw_setup(nspu_desc, "nfd_cfg_pf0_num_ports",
2938                                &offset_symbol);
2939         if (ret)
2940                 goto error;
2941
2942         bar_offset = (uint8_t *)dev->mem_resource[0].addr;
2943         bar_offset += offset_symbol;
2944         total_ports = (uint32_t)*bar_offset;
2945         PMD_INIT_LOG(INFO, "Total pf ports: %d\n", total_ports);
2946
2947         if (total_ports <= 0 || total_ports > 8) {
2948                 RTE_LOG(ERR, PMD, "nfd_cfg_pf0_num_ports symbol with wrong value");
2949                 ret = -ENODEV;
2950                 goto error;
2951         }
2952
2953         for (i = 0; i < total_ports; i++) {
2954                 ret = nfp_pf_create_dev(dev, i, total_ports, nfpu_desc, &priv);
2955                 if (ret)
2956                         goto error;
2957         }
2958
2959         return 0;
2960
2961 error:
2962         nfpu_close(nfpu_desc);
2963 nfpu_error:
2964         rte_free(nfpu_desc);
2965
2966         return ret;
2967 }
2968
2969 static const struct rte_pci_id pci_id_nfp_pf_net_map[] = {
2970         {
2971                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2972                                PCI_DEVICE_ID_NFP4000_PF_NIC)
2973         },
2974         {
2975                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2976                                PCI_DEVICE_ID_NFP6000_PF_NIC)
2977         },
2978         {
2979                 .vendor_id = 0,
2980         },
2981 };
2982
2983 static const struct rte_pci_id pci_id_nfp_vf_net_map[] = {
2984         {
2985                 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
2986                                PCI_DEVICE_ID_NFP6000_VF_NIC)
2987         },
2988         {
2989                 .vendor_id = 0,
2990         },
2991 };
2992
2993 static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2994         struct rte_pci_device *pci_dev)
2995 {
2996         return rte_eth_dev_pci_generic_probe(pci_dev,
2997                 sizeof(struct nfp_net_adapter), nfp_net_init);
2998 }
2999
3000 static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
3001 {
3002         return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
3003 }
3004
3005 static struct rte_pci_driver rte_nfp_net_pf_pmd = {
3006         .id_table = pci_id_nfp_pf_net_map,
3007         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
3008         .probe = nfp_pf_pci_probe,
3009         .remove = eth_nfp_pci_remove,
3010 };
3011
3012 static struct rte_pci_driver rte_nfp_net_vf_pmd = {
3013         .id_table = pci_id_nfp_vf_net_map,
3014         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
3015         .probe = eth_nfp_pci_probe,
3016         .remove = eth_nfp_pci_remove,
3017 };
3018
3019 RTE_PMD_REGISTER_PCI(net_nfp_pf, rte_nfp_net_pf_pmd);
3020 RTE_PMD_REGISTER_PCI(net_nfp_vf, rte_nfp_net_vf_pmd);
3021 RTE_PMD_REGISTER_PCI_TABLE(net_nfp_pf, pci_id_nfp_pf_net_map);
3022 RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
3023 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
3024 RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
3025
3026 /*
3027  * Local variables:
3028  * c-file-style: "Linux"
3029  * indent-tabs-mode: t
3030  * End:
3031  */