net/mrvl: support RSS hashing
[dpdk.git] / drivers / net / mrvl / mrvl_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Semihalf. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Semihalf nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_ethdev.h>
34 #include <rte_kvargs.h>
35 #include <rte_log.h>
36 #include <rte_malloc.h>
37 #include <rte_vdev.h>
38
39 /* Unluckily, container_of is defined by both DPDK and MUSDK,
40  * we'll declare only one version.
41  *
42  * Note that it is not used in this PMD anyway.
43  */
44 #ifdef container_of
45 #undef container_of
46 #endif
47
48 #include <drivers/mv_pp2.h>
49 #include <drivers/mv_pp2_bpool.h>
50 #include <drivers/mv_pp2_hif.h>
51
52 #include <fcntl.h>
53 #include <linux/ethtool.h>
54 #include <linux/sockios.h>
55 #include <net/if.h>
56 #include <net/if_arp.h>
57 #include <sys/ioctl.h>
58 #include <sys/socket.h>
59 #include <sys/stat.h>
60 #include <sys/types.h>
61
62 #include "mrvl_ethdev.h"
63 #include "mrvl_qos.h"
64
65 /* bitmask with reserved hifs */
66 #define MRVL_MUSDK_HIFS_RESERVED 0x0F
67 /* bitmask with reserved bpools */
68 #define MRVL_MUSDK_BPOOLS_RESERVED 0x07
69 /* bitmask with reserved kernel RSS tables */
70 #define MRVL_MUSDK_RSS_RESERVED 0x01
71 /* maximum number of available hifs */
72 #define MRVL_MUSDK_HIFS_MAX 9
73
74 /* prefetch shift */
75 #define MRVL_MUSDK_PREFETCH_SHIFT 2
76
77 /* TCAM has 25 entries reserved for uc/mc filter entries */
78 #define MRVL_MAC_ADDRS_MAX 25
79 #define MRVL_MATCH_LEN 16
80 #define MRVL_PKT_EFFEC_OFFS (MRVL_PKT_OFFS + MV_MH_SIZE)
81 /* Maximum allowable packet size */
82 #define MRVL_PKT_SIZE_MAX (10240 - MV_MH_SIZE)
83
84 #define MRVL_IFACE_NAME_ARG "iface"
85 #define MRVL_CFG_ARG "cfg"
86
87 #define MRVL_BURST_SIZE 64
88
89 #define MRVL_ARP_LENGTH 28
90
91 #define MRVL_COOKIE_ADDR_INVALID ~0ULL
92
93 #define MRVL_COOKIE_HIGH_ADDR_SHIFT     (sizeof(pp2_cookie_t) * 8)
94 #define MRVL_COOKIE_HIGH_ADDR_MASK      (~0ULL << MRVL_COOKIE_HIGH_ADDR_SHIFT)
95
96 static const char * const valid_args[] = {
97         MRVL_IFACE_NAME_ARG,
98         MRVL_CFG_ARG,
99         NULL
100 };
101
102 static int used_hifs = MRVL_MUSDK_HIFS_RESERVED;
103 static struct pp2_hif *hifs[RTE_MAX_LCORE];
104 static int used_bpools[PP2_NUM_PKT_PROC] = {
105         MRVL_MUSDK_BPOOLS_RESERVED,
106         MRVL_MUSDK_BPOOLS_RESERVED
107 };
108
109 struct pp2_bpool *mrvl_port_to_bpool_lookup[RTE_MAX_ETHPORTS];
110 int mrvl_port_bpool_size[PP2_NUM_PKT_PROC][PP2_BPOOL_NUM_POOLS][RTE_MAX_LCORE];
111 uint64_t cookie_addr_high = MRVL_COOKIE_ADDR_INVALID;
112
113 /*
114  * To use buffer harvesting based on loopback port shadow queue structure
115  * was introduced for buffers information bookkeeping.
116  *
117  * Before sending the packet, related buffer information (pp2_buff_inf) is
118  * stored in shadow queue. After packet is transmitted no longer used
119  * packet buffer is released back to it's original hardware pool,
120  * on condition it originated from interface.
121  * In case it  was generated by application itself i.e: mbuf->port field is
122  * 0xff then its released to software mempool.
123  */
124 struct mrvl_shadow_txq {
125         int head;           /* write index - used when sending buffers */
126         int tail;           /* read index - used when releasing buffers */
127         u16 size;           /* queue occupied size */
128         u16 num_to_release; /* number of buffers sent, that can be released */
129         struct buff_release_entry ent[MRVL_PP2_TX_SHADOWQ_SIZE]; /* q entries */
130 };
131
132 struct mrvl_rxq {
133         struct mrvl_priv *priv;
134         struct rte_mempool *mp;
135         int queue_id;
136         int port_id;
137 };
138
139 struct mrvl_txq {
140         struct mrvl_priv *priv;
141         int queue_id;
142         int port_id;
143 };
144
145 /*
146  * Every tx queue should have dedicated shadow tx queue.
147  *
148  * Ports assigned by DPDK might not start at zero or be continuous so
149  * as a workaround define shadow queues for each possible port so that
150  * we eventually fit somewhere.
151  */
152 struct mrvl_shadow_txq shadow_txqs[RTE_MAX_ETHPORTS][RTE_MAX_LCORE];
153
154 /** Number of ports configured. */
155 int mrvl_ports_nb;
156 static int mrvl_lcore_first;
157 static int mrvl_lcore_last;
158
159 static inline int
160 mrvl_get_bpool_size(int pp2_id, int pool_id)
161 {
162         int i;
163         int size = 0;
164
165         for (i = mrvl_lcore_first; i <= mrvl_lcore_last; i++)
166                 size += mrvl_port_bpool_size[pp2_id][pool_id][i];
167
168         return size;
169 }
170
171 static inline int
172 mrvl_reserve_bit(int *bitmap, int max)
173 {
174         int n = sizeof(*bitmap) * 8 - __builtin_clz(*bitmap);
175
176         if (n >= max)
177                 return -1;
178
179         *bitmap |= 1 << n;
180
181         return n;
182 }
183
184 /**
185  * Configure rss based on dpdk rss configuration.
186  *
187  * @param priv
188  *   Pointer to private structure.
189  * @param rss_conf
190  *   Pointer to RSS configuration.
191  *
192  * @return
193  *   0 on success, negative error value otherwise.
194  */
195 static int
196 mrvl_configure_rss(struct mrvl_priv *priv, struct rte_eth_rss_conf *rss_conf)
197 {
198         if (rss_conf->rss_key)
199                 RTE_LOG(WARNING, PMD, "Changing hash key is not supported\n");
200
201         if (rss_conf->rss_hf == 0) {
202                 priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
203         } else if (rss_conf->rss_hf & ETH_RSS_IPV4) {
204                 priv->ppio_params.inqs_params.hash_type =
205                         PP2_PPIO_HASH_T_2_TUPLE;
206         } else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) {
207                 priv->ppio_params.inqs_params.hash_type =
208                         PP2_PPIO_HASH_T_5_TUPLE;
209                 priv->rss_hf_tcp = 1;
210         } else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
211                 priv->ppio_params.inqs_params.hash_type =
212                         PP2_PPIO_HASH_T_5_TUPLE;
213                 priv->rss_hf_tcp = 0;
214         } else {
215                 return -EINVAL;
216         }
217
218         return 0;
219 }
220
221 /**
222  * Ethernet device configuration.
223  *
224  * Prepare the driver for a given number of TX and RX queues and
225  * configure RSS.
226  *
227  * @param dev
228  *   Pointer to Ethernet device structure.
229  *
230  * @return
231  *   0 on success, negative error value otherwise.
232  */
233 static int
234 mrvl_dev_configure(struct rte_eth_dev *dev)
235 {
236         struct mrvl_priv *priv = dev->data->dev_private;
237         int ret;
238
239         if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_NONE &&
240             dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
241                 RTE_LOG(INFO, PMD, "Unsupported rx multi queue mode %d\n",
242                         dev->data->dev_conf.rxmode.mq_mode);
243                 return -EINVAL;
244         }
245
246         if (!dev->data->dev_conf.rxmode.hw_strip_crc) {
247                 RTE_LOG(INFO, PMD,
248                         "L2 CRC stripping is always enabled in hw\n");
249                 dev->data->dev_conf.rxmode.hw_strip_crc = 1;
250         }
251
252         if (dev->data->dev_conf.rxmode.hw_vlan_strip) {
253                 RTE_LOG(INFO, PMD, "VLAN stripping not supported\n");
254                 return -EINVAL;
255         }
256
257         if (dev->data->dev_conf.rxmode.split_hdr_size) {
258                 RTE_LOG(INFO, PMD, "Split headers not supported\n");
259                 return -EINVAL;
260         }
261
262         if (dev->data->dev_conf.rxmode.enable_scatter) {
263                 RTE_LOG(INFO, PMD, "RX Scatter/Gather not supported\n");
264                 return -EINVAL;
265         }
266
267         if (dev->data->dev_conf.rxmode.enable_lro) {
268                 RTE_LOG(INFO, PMD, "LRO not supported\n");
269                 return -EINVAL;
270         }
271
272         if (dev->data->dev_conf.rxmode.jumbo_frame)
273                 dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
274                                  ETHER_HDR_LEN - ETHER_CRC_LEN;
275
276         ret = mrvl_configure_rxqs(priv, dev->data->port_id,
277                                   dev->data->nb_rx_queues);
278         if (ret < 0)
279                 return ret;
280
281         priv->ppio_params.outqs_params.num_outqs = dev->data->nb_tx_queues;
282         priv->nb_rx_queues = dev->data->nb_rx_queues;
283
284         if (dev->data->nb_rx_queues == 1 &&
285             dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
286                 RTE_LOG(WARNING, PMD, "Disabling hash for 1 rx queue\n");
287                 priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
288
289                 return 0;
290         }
291
292         return mrvl_configure_rss(priv,
293                                   &dev->data->dev_conf.rx_adv_conf.rss_conf);
294 }
295
296 /**
297  * DPDK callback to change the MTU.
298  *
299  * Setting the MTU affects hardware MRU (packets larger than the MRU
300  * will be dropped).
301  *
302  * @param dev
303  *   Pointer to Ethernet device structure.
304  * @param mtu
305  *   New MTU.
306  *
307  * @return
308  *   0 on success, negative error value otherwise.
309  */
310 static int
311 mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
312 {
313         struct mrvl_priv *priv = dev->data->dev_private;
314         /* extra MV_MH_SIZE bytes are required for Marvell tag */
315         uint16_t mru = mtu + MV_MH_SIZE + ETHER_HDR_LEN + ETHER_CRC_LEN;
316         int ret;
317
318         if (mtu < ETHER_MIN_MTU || mru > MRVL_PKT_SIZE_MAX)
319                 return -EINVAL;
320
321         ret = pp2_ppio_set_mru(priv->ppio, mru);
322         if (ret)
323                 return ret;
324
325         return pp2_ppio_set_mtu(priv->ppio, mtu);
326 }
327
328 /**
329  * DPDK callback to bring the link up.
330  *
331  * @param dev
332  *   Pointer to Ethernet device structure.
333  *
334  * @return
335  *   0 on success, negative error value otherwise.
336  */
337 static int
338 mrvl_dev_set_link_up(struct rte_eth_dev *dev)
339 {
340         struct mrvl_priv *priv = dev->data->dev_private;
341         int ret;
342
343         ret = pp2_ppio_enable(priv->ppio);
344         if (ret)
345                 return ret;
346
347         /*
348          * mtu/mru can be updated if pp2_ppio_enable() was called at least once
349          * as pp2_ppio_enable() changes port->t_mode from default 0 to
350          * PP2_TRAFFIC_INGRESS_EGRESS.
351          *
352          * Set mtu to default DPDK value here.
353          */
354         ret = mrvl_mtu_set(dev, dev->data->mtu);
355         if (ret)
356                 pp2_ppio_disable(priv->ppio);
357
358         dev->data->dev_link.link_status = ETH_LINK_UP;
359
360         return ret;
361 }
362
363 /**
364  * DPDK callback to bring the link down.
365  *
366  * @param dev
367  *   Pointer to Ethernet device structure.
368  *
369  * @return
370  *   0 on success, negative error value otherwise.
371  */
372 static int
373 mrvl_dev_set_link_down(struct rte_eth_dev *dev)
374 {
375         struct mrvl_priv *priv = dev->data->dev_private;
376         int ret;
377
378         ret = pp2_ppio_disable(priv->ppio);
379         if (ret)
380                 return ret;
381
382         dev->data->dev_link.link_status = ETH_LINK_DOWN;
383
384         return ret;
385 }
386
387 /**
388  * DPDK callback to start the device.
389  *
390  * @param dev
391  *   Pointer to Ethernet device structure.
392  *
393  * @return
394  *   0 on success, negative errno value on failure.
395  */
396 static int
397 mrvl_dev_start(struct rte_eth_dev *dev)
398 {
399         struct mrvl_priv *priv = dev->data->dev_private;
400         char match[MRVL_MATCH_LEN];
401         int ret;
402
403         snprintf(match, sizeof(match), "ppio-%d:%d",
404                  priv->pp_id, priv->ppio_id);
405         priv->ppio_params.match = match;
406
407         /*
408          * Calculate the maximum bpool size for refill feature to 1.5 of the
409          * configured size. In case the bpool size will exceed this value,
410          * superfluous buffers will be removed
411          */
412         priv->bpool_max_size = priv->bpool_init_size +
413                               (priv->bpool_init_size >> 1);
414         /*
415          * Calculate the minimum bpool size for refill feature as follows:
416          * 2 default burst sizes multiply by number of rx queues.
417          * If the bpool size will be below this value, new buffers will
418          * be added to the pool.
419          */
420         priv->bpool_min_size = priv->nb_rx_queues * MRVL_BURST_SIZE * 2;
421
422         ret = pp2_ppio_init(&priv->ppio_params, &priv->ppio);
423         if (ret)
424                 return ret;
425
426         /*
427          * In case there are some some stale uc/mc mac addresses flush them
428          * here. It cannot be done during mrvl_dev_close() as port information
429          * is already gone at that point (due to pp2_ppio_deinit() in
430          * mrvl_dev_stop()).
431          */
432         if (!priv->uc_mc_flushed) {
433                 ret = pp2_ppio_flush_mac_addrs(priv->ppio, 1, 1);
434                 if (ret) {
435                         RTE_LOG(ERR, PMD,
436                                 "Failed to flush uc/mc filter list\n");
437                         goto out;
438                 }
439                 priv->uc_mc_flushed = 1;
440         }
441
442         /* For default QoS config, don't start classifier. */
443         if (mrvl_qos_cfg) {
444                 ret = mrvl_start_qos_mapping(priv);
445                 if (ret) {
446                         pp2_ppio_deinit(priv->ppio);
447                         return ret;
448                 }
449         }
450
451         ret = mrvl_dev_set_link_up(dev);
452         if (ret)
453                 goto out;
454
455         return 0;
456 out:
457         pp2_ppio_deinit(priv->ppio);
458         return ret;
459 }
460
461 /**
462  * Flush receive queues.
463  *
464  * @param dev
465  *   Pointer to Ethernet device structure.
466  */
467 static void
468 mrvl_flush_rx_queues(struct rte_eth_dev *dev)
469 {
470         int i;
471
472         RTE_LOG(INFO, PMD, "Flushing rx queues\n");
473         for (i = 0; i < dev->data->nb_rx_queues; i++) {
474                 int ret, num;
475
476                 do {
477                         struct mrvl_rxq *q = dev->data->rx_queues[i];
478                         struct pp2_ppio_desc descs[MRVL_PP2_RXD_MAX];
479
480                         num = MRVL_PP2_RXD_MAX;
481                         ret = pp2_ppio_recv(q->priv->ppio,
482                                             q->priv->rxq_map[q->queue_id].tc,
483                                             q->priv->rxq_map[q->queue_id].inq,
484                                             descs, (uint16_t *)&num);
485                 } while (ret == 0 && num);
486         }
487 }
488
489 /**
490  * Flush transmit shadow queues.
491  *
492  * @param dev
493  *   Pointer to Ethernet device structure.
494  */
495 static void
496 mrvl_flush_tx_shadow_queues(struct rte_eth_dev *dev)
497 {
498         int i;
499
500         RTE_LOG(INFO, PMD, "Flushing tx shadow queues\n");
501         for (i = 0; i < RTE_MAX_LCORE; i++) {
502                 struct mrvl_shadow_txq *sq =
503                         &shadow_txqs[dev->data->port_id][i];
504
505                 while (sq->tail != sq->head) {
506                         uint64_t addr = cookie_addr_high |
507                                         sq->ent[sq->tail].buff.cookie;
508                         rte_pktmbuf_free((struct rte_mbuf *)addr);
509                         sq->tail = (sq->tail + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
510                 }
511
512                 memset(sq, 0, sizeof(*sq));
513         }
514 }
515
516 /**
517  * Flush hardware bpool (buffer-pool).
518  *
519  * @param dev
520  *   Pointer to Ethernet device structure.
521  */
522 static void
523 mrvl_flush_bpool(struct rte_eth_dev *dev)
524 {
525         struct mrvl_priv *priv = dev->data->dev_private;
526         uint32_t num;
527         int ret;
528
529         ret = pp2_bpool_get_num_buffs(priv->bpool, &num);
530         if (ret) {
531                 RTE_LOG(ERR, PMD, "Failed to get bpool buffers number\n");
532                 return;
533         }
534
535         while (num--) {
536                 struct pp2_buff_inf inf;
537                 uint64_t addr;
538
539                 ret = pp2_bpool_get_buff(hifs[rte_lcore_id()], priv->bpool,
540                                          &inf);
541                 if (ret)
542                         break;
543
544                 addr = cookie_addr_high | inf.cookie;
545                 rte_pktmbuf_free((struct rte_mbuf *)addr);
546         }
547 }
548
549 /**
550  * DPDK callback to stop the device.
551  *
552  * @param dev
553  *   Pointer to Ethernet device structure.
554  */
555 static void
556 mrvl_dev_stop(struct rte_eth_dev *dev)
557 {
558         struct mrvl_priv *priv = dev->data->dev_private;
559
560         mrvl_dev_set_link_down(dev);
561         mrvl_flush_rx_queues(dev);
562         mrvl_flush_tx_shadow_queues(dev);
563         if (priv->qos_tbl)
564                 pp2_cls_qos_tbl_deinit(priv->qos_tbl);
565         pp2_ppio_deinit(priv->ppio);
566         priv->ppio = NULL;
567 }
568
569 /**
570  * DPDK callback to close the device.
571  *
572  * @param dev
573  *   Pointer to Ethernet device structure.
574  */
575 static void
576 mrvl_dev_close(struct rte_eth_dev *dev)
577 {
578         struct mrvl_priv *priv = dev->data->dev_private;
579         size_t i;
580
581         for (i = 0; i < priv->ppio_params.inqs_params.num_tcs; ++i) {
582                 struct pp2_ppio_tc_params *tc_params =
583                         &priv->ppio_params.inqs_params.tcs_params[i];
584
585                 if (tc_params->inqs_params) {
586                         rte_free(tc_params->inqs_params);
587                         tc_params->inqs_params = NULL;
588                 }
589         }
590
591         mrvl_flush_bpool(dev);
592 }
593
594 /**
595  * DPDK callback to retrieve physical link information.
596  *
597  * @param dev
598  *   Pointer to Ethernet device structure.
599  * @param wait_to_complete
600  *   Wait for request completion (ignored).
601  *
602  * @return
603  *   0 on success, negative error value otherwise.
604  */
605 static int
606 mrvl_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
607 {
608         /*
609          * TODO
610          * once MUSDK provides necessary API use it here
611          */
612         struct ethtool_cmd edata;
613         struct ifreq req;
614         int ret, fd;
615
616         edata.cmd = ETHTOOL_GSET;
617
618         strcpy(req.ifr_name, dev->data->name);
619         req.ifr_data = (void *)&edata;
620
621         fd = socket(AF_INET, SOCK_DGRAM, 0);
622         if (fd == -1)
623                 return -EFAULT;
624
625         ret = ioctl(fd, SIOCETHTOOL, &req);
626         if (ret == -1) {
627                 close(fd);
628                 return -EFAULT;
629         }
630
631         close(fd);
632
633         switch (ethtool_cmd_speed(&edata)) {
634         case SPEED_10:
635                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_10M;
636                 break;
637         case SPEED_100:
638                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_100M;
639                 break;
640         case SPEED_1000:
641                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_1G;
642                 break;
643         case SPEED_10000:
644                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_10G;
645                 break;
646         default:
647                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
648         }
649
650         dev->data->dev_link.link_duplex = edata.duplex ? ETH_LINK_FULL_DUPLEX :
651                                                          ETH_LINK_HALF_DUPLEX;
652         dev->data->dev_link.link_autoneg = edata.autoneg ? ETH_LINK_AUTONEG :
653                                                            ETH_LINK_FIXED;
654
655         return 0;
656 }
657
658 /**
659  * DPDK callback to enable promiscuous mode.
660  *
661  * @param dev
662  *   Pointer to Ethernet device structure.
663  */
664 static void
665 mrvl_promiscuous_enable(struct rte_eth_dev *dev)
666 {
667         struct mrvl_priv *priv = dev->data->dev_private;
668         int ret;
669
670         ret = pp2_ppio_set_uc_promisc(priv->ppio, 1);
671         if (ret)
672                 RTE_LOG(ERR, PMD, "Failed to enable promiscuous mode\n");
673 }
674
675 /**
676  * DPDK callback to enable allmulti mode.
677  *
678  * @param dev
679  *   Pointer to Ethernet device structure.
680  */
681 static void
682 mrvl_allmulticast_enable(struct rte_eth_dev *dev)
683 {
684         struct mrvl_priv *priv = dev->data->dev_private;
685         int ret;
686
687         ret = pp2_ppio_set_mc_promisc(priv->ppio, 1);
688         if (ret)
689                 RTE_LOG(ERR, PMD, "Failed enable all-multicast mode\n");
690 }
691
692 /**
693  * DPDK callback to disable promiscuous mode.
694  *
695  * @param dev
696  *   Pointer to Ethernet device structure.
697  */
698 static void
699 mrvl_promiscuous_disable(struct rte_eth_dev *dev)
700 {
701         struct mrvl_priv *priv = dev->data->dev_private;
702         int ret;
703
704         ret = pp2_ppio_set_uc_promisc(priv->ppio, 0);
705         if (ret)
706                 RTE_LOG(ERR, PMD, "Failed to disable promiscuous mode\n");
707 }
708
709 /**
710  * DPDK callback to disable allmulticast mode.
711  *
712  * @param dev
713  *   Pointer to Ethernet device structure.
714  */
715 static void
716 mrvl_allmulticast_disable(struct rte_eth_dev *dev)
717 {
718         struct mrvl_priv *priv = dev->data->dev_private;
719         int ret;
720
721         ret = pp2_ppio_set_mc_promisc(priv->ppio, 0);
722         if (ret)
723                 RTE_LOG(ERR, PMD, "Failed to disable all-multicast mode\n");
724 }
725
726 /**
727  * DPDK callback to remove a MAC address.
728  *
729  * @param dev
730  *   Pointer to Ethernet device structure.
731  * @param index
732  *   MAC address index.
733  */
734 static void
735 mrvl_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
736 {
737         struct mrvl_priv *priv = dev->data->dev_private;
738         char buf[ETHER_ADDR_FMT_SIZE];
739         int ret;
740
741         ret = pp2_ppio_remove_mac_addr(priv->ppio,
742                                        dev->data->mac_addrs[index].addr_bytes);
743         if (ret) {
744                 ether_format_addr(buf, sizeof(buf),
745                                   &dev->data->mac_addrs[index]);
746                 RTE_LOG(ERR, PMD, "Failed to remove mac %s\n", buf);
747         }
748 }
749
750 /**
751  * DPDK callback to add a MAC address.
752  *
753  * @param dev
754  *   Pointer to Ethernet device structure.
755  * @param mac_addr
756  *   MAC address to register.
757  * @param index
758  *   MAC address index.
759  * @param vmdq
760  *   VMDq pool index to associate address with (unused).
761  *
762  * @return
763  *   0 on success, negative error value otherwise.
764  */
765 static int
766 mrvl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
767                   uint32_t index, uint32_t vmdq __rte_unused)
768 {
769         struct mrvl_priv *priv = dev->data->dev_private;
770         char buf[ETHER_ADDR_FMT_SIZE];
771         int ret;
772
773         if (index == 0)
774                 /* For setting index 0, mrvl_mac_addr_set() should be used.*/
775                 return -1;
776
777         /*
778          * Maximum number of uc addresses can be tuned via kernel module mvpp2x
779          * parameter uc_filter_max. Maximum number of mc addresses is then
780          * MRVL_MAC_ADDRS_MAX - uc_filter_max. Currently it defaults to 4 and
781          * 21 respectively.
782          *
783          * If more than uc_filter_max uc addresses were added to filter list
784          * then NIC will switch to promiscuous mode automatically.
785          *
786          * If more than MRVL_MAC_ADDRS_MAX - uc_filter_max number mc addresses
787          * were added to filter list then NIC will switch to all-multicast mode
788          * automatically.
789          */
790         ret = pp2_ppio_add_mac_addr(priv->ppio, mac_addr->addr_bytes);
791         if (ret) {
792                 ether_format_addr(buf, sizeof(buf), mac_addr);
793                 RTE_LOG(ERR, PMD, "Failed to add mac %s\n", buf);
794                 return -1;
795         }
796
797         return 0;
798 }
799
800 /**
801  * DPDK callback to set the primary MAC address.
802  *
803  * @param dev
804  *   Pointer to Ethernet device structure.
805  * @param mac_addr
806  *   MAC address to register.
807  */
808 static void
809 mrvl_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
810 {
811         struct mrvl_priv *priv = dev->data->dev_private;
812
813         pp2_ppio_set_mac_addr(priv->ppio, mac_addr->addr_bytes);
814         /*
815          * TODO
816          * Port stops sending packets if pp2_ppio_set_mac_addr()
817          * was called after pp2_ppio_enable(). As a quick fix issue
818          * enable port once again.
819          */
820         pp2_ppio_enable(priv->ppio);
821 }
822
823 /**
824  * DPDK callback to get information about the device.
825  *
826  * @param dev
827  *   Pointer to Ethernet device structure (unused).
828  * @param info
829  *   Info structure output buffer.
830  */
831 static void
832 mrvl_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
833                    struct rte_eth_dev_info *info)
834 {
835         info->speed_capa = ETH_LINK_SPEED_10M |
836                            ETH_LINK_SPEED_100M |
837                            ETH_LINK_SPEED_1G |
838                            ETH_LINK_SPEED_10G;
839
840         info->max_rx_queues = MRVL_PP2_RXQ_MAX;
841         info->max_tx_queues = MRVL_PP2_TXQ_MAX;
842         info->max_mac_addrs = MRVL_MAC_ADDRS_MAX;
843
844         info->rx_desc_lim.nb_max = MRVL_PP2_RXD_MAX;
845         info->rx_desc_lim.nb_min = MRVL_PP2_RXD_MIN;
846         info->rx_desc_lim.nb_align = MRVL_PP2_RXD_ALIGN;
847
848         info->tx_desc_lim.nb_max = MRVL_PP2_TXD_MAX;
849         info->tx_desc_lim.nb_min = MRVL_PP2_TXD_MIN;
850         info->tx_desc_lim.nb_align = MRVL_PP2_TXD_ALIGN;
851
852         info->rx_offload_capa = DEV_RX_OFFLOAD_JUMBO_FRAME;
853         info->flow_type_rss_offloads = ETH_RSS_IPV4 |
854                                        ETH_RSS_NONFRAG_IPV4_TCP |
855                                        ETH_RSS_NONFRAG_IPV4_UDP;
856
857         /* By default packets are dropped if no descriptors are available */
858         info->default_rxconf.rx_drop_en = 1;
859
860         info->max_rx_pktlen = MRVL_PKT_SIZE_MAX;
861 }
862
863 /**
864  * DPDK callback to get information about specific receive queue.
865  *
866  * @param dev
867  *   Pointer to Ethernet device structure.
868  * @param rx_queue_id
869  *   Receive queue index.
870  * @param qinfo
871  *   Receive queue information structure.
872  */
873 static void mrvl_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
874                               struct rte_eth_rxq_info *qinfo)
875 {
876         struct mrvl_rxq *q = dev->data->rx_queues[rx_queue_id];
877         struct mrvl_priv *priv = dev->data->dev_private;
878         int inq = priv->rxq_map[rx_queue_id].inq;
879         int tc = priv->rxq_map[rx_queue_id].tc;
880         struct pp2_ppio_tc_params *tc_params =
881                 &priv->ppio_params.inqs_params.tcs_params[tc];
882
883         qinfo->mp = q->mp;
884         qinfo->nb_desc = tc_params->inqs_params[inq].size;
885 }
886
887 /**
888  * DPDK callback to get information about specific transmit queue.
889  *
890  * @param dev
891  *   Pointer to Ethernet device structure.
892  * @param tx_queue_id
893  *   Transmit queue index.
894  * @param qinfo
895  *   Transmit queue information structure.
896  */
897 static void mrvl_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
898                               struct rte_eth_txq_info *qinfo)
899 {
900         struct mrvl_priv *priv = dev->data->dev_private;
901
902         qinfo->nb_desc =
903                 priv->ppio_params.outqs_params.outqs_params[tx_queue_id].size;
904 }
905
906 /**
907  * Release buffers to hardware bpool (buffer-pool)
908  *
909  * @param rxq
910  *   Receive queue pointer.
911  * @param num
912  *   Number of buffers to release to bpool.
913  *
914  * @return
915  *   0 on success, negative error value otherwise.
916  */
917 static int
918 mrvl_fill_bpool(struct mrvl_rxq *rxq, int num)
919 {
920         struct buff_release_entry entries[MRVL_PP2_TXD_MAX];
921         struct rte_mbuf *mbufs[MRVL_PP2_TXD_MAX];
922         int i, ret;
923         unsigned int core_id = rte_lcore_id();
924         struct pp2_hif *hif = hifs[core_id];
925         struct pp2_bpool *bpool = rxq->priv->bpool;
926
927         ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, num);
928         if (ret)
929                 return ret;
930
931         if (cookie_addr_high == MRVL_COOKIE_ADDR_INVALID)
932                 cookie_addr_high =
933                         (uint64_t)mbufs[0] & MRVL_COOKIE_HIGH_ADDR_MASK;
934
935         for (i = 0; i < num; i++) {
936                 if (((uint64_t)mbufs[i] & MRVL_COOKIE_HIGH_ADDR_MASK)
937                         != cookie_addr_high) {
938                         RTE_LOG(ERR, PMD,
939                                 "mbuf virtual addr high 0x%lx out of range\n",
940                                 (uint64_t)mbufs[i] >> 32);
941                         goto out;
942                 }
943
944                 entries[i].buff.addr =
945                         rte_mbuf_data_dma_addr_default(mbufs[i]);
946                 entries[i].buff.cookie = (pp2_cookie_t)(uint64_t)mbufs[i];
947                 entries[i].bpool = bpool;
948         }
949
950         pp2_bpool_put_buffs(hif, entries, (uint16_t *)&i);
951         mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] += i;
952
953         if (i != num)
954                 goto out;
955
956         return 0;
957 out:
958         for (; i < num; i++)
959                 rte_pktmbuf_free(mbufs[i]);
960
961         return -1;
962 }
963
964 /**
965  * DPDK callback to configure the receive queue.
966  *
967  * @param dev
968  *   Pointer to Ethernet device structure.
969  * @param idx
970  *   RX queue index.
971  * @param desc
972  *   Number of descriptors to configure in queue.
973  * @param socket
974  *   NUMA socket on which memory must be allocated.
975  * @param conf
976  *   Thresholds parameters (unused_).
977  * @param mp
978  *   Memory pool for buffer allocations.
979  *
980  * @return
981  *   0 on success, negative error value otherwise.
982  */
983 static int
984 mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
985                     unsigned int socket,
986                     const struct rte_eth_rxconf *conf __rte_unused,
987                     struct rte_mempool *mp)
988 {
989         struct mrvl_priv *priv = dev->data->dev_private;
990         struct mrvl_rxq *rxq;
991         uint32_t min_size,
992                  max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
993         int ret, tc, inq;
994
995         if (priv->rxq_map[idx].tc == MRVL_UNKNOWN_TC) {
996                 /*
997                  * Unknown TC mapping, mapping will not have a correct queue.
998                  */
999                 RTE_LOG(ERR, PMD, "Unknown TC mapping for queue %hu eth%hhu\n",
1000                         idx, priv->ppio_id);
1001                 return -EFAULT;
1002         }
1003
1004         min_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM -
1005                    MRVL_PKT_EFFEC_OFFS;
1006         if (min_size < max_rx_pkt_len) {
1007                 RTE_LOG(ERR, PMD,
1008                         "Mbuf size must be increased to %u bytes to hold up to %u bytes of data.\n",
1009                         max_rx_pkt_len + RTE_PKTMBUF_HEADROOM +
1010                         MRVL_PKT_EFFEC_OFFS,
1011                         max_rx_pkt_len);
1012                 return -EINVAL;
1013         }
1014
1015         if (dev->data->rx_queues[idx]) {
1016                 rte_free(dev->data->rx_queues[idx]);
1017                 dev->data->rx_queues[idx] = NULL;
1018         }
1019
1020         rxq = rte_zmalloc_socket("rxq", sizeof(*rxq), 0, socket);
1021         if (!rxq)
1022                 return -ENOMEM;
1023
1024         rxq->priv = priv;
1025         rxq->mp = mp;
1026         rxq->queue_id = idx;
1027         rxq->port_id = dev->data->port_id;
1028         mrvl_port_to_bpool_lookup[rxq->port_id] = priv->bpool;
1029
1030         tc = priv->rxq_map[rxq->queue_id].tc,
1031         inq = priv->rxq_map[rxq->queue_id].inq;
1032         priv->ppio_params.inqs_params.tcs_params[tc].inqs_params[inq].size =
1033                 desc;
1034
1035         ret = mrvl_fill_bpool(rxq, desc);
1036         if (ret) {
1037                 rte_free(rxq);
1038                 return ret;
1039         }
1040
1041         priv->bpool_init_size += desc;
1042
1043         dev->data->rx_queues[idx] = rxq;
1044
1045         return 0;
1046 }
1047
1048 /**
1049  * DPDK callback to release the receive queue.
1050  *
1051  * @param rxq
1052  *   Generic receive queue pointer.
1053  */
1054 static void
1055 mrvl_rx_queue_release(void *rxq)
1056 {
1057         struct mrvl_rxq *q = rxq;
1058         struct pp2_ppio_tc_params *tc_params;
1059         int i, num, tc, inq;
1060
1061         if (!q)
1062                 return;
1063
1064         tc = q->priv->rxq_map[q->queue_id].tc;
1065         inq = q->priv->rxq_map[q->queue_id].inq;
1066         tc_params = &q->priv->ppio_params.inqs_params.tcs_params[tc];
1067         num = tc_params->inqs_params[inq].size;
1068         for (i = 0; i < num; i++) {
1069                 struct pp2_buff_inf inf;
1070                 uint64_t addr;
1071
1072                 pp2_bpool_get_buff(hifs[rte_lcore_id()], q->priv->bpool, &inf);
1073                 addr = cookie_addr_high | inf.cookie;
1074                 rte_pktmbuf_free((struct rte_mbuf *)addr);
1075         }
1076
1077         rte_free(q);
1078 }
1079
1080 /**
1081  * DPDK callback to configure the transmit queue.
1082  *
1083  * @param dev
1084  *   Pointer to Ethernet device structure.
1085  * @param idx
1086  *   Transmit queue index.
1087  * @param desc
1088  *   Number of descriptors to configure in the queue.
1089  * @param socket
1090  *   NUMA socket on which memory must be allocated.
1091  * @param conf
1092  *   Thresholds parameters (unused).
1093  *
1094  * @return
1095  *   0 on success, negative error value otherwise.
1096  */
1097 static int
1098 mrvl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1099                     unsigned int socket,
1100                     const struct rte_eth_txconf *conf __rte_unused)
1101 {
1102         struct mrvl_priv *priv = dev->data->dev_private;
1103         struct mrvl_txq *txq;
1104
1105         if (dev->data->tx_queues[idx]) {
1106                 rte_free(dev->data->tx_queues[idx]);
1107                 dev->data->tx_queues[idx] = NULL;
1108         }
1109
1110         txq = rte_zmalloc_socket("txq", sizeof(*txq), 0, socket);
1111         if (!txq)
1112                 return -ENOMEM;
1113
1114         txq->priv = priv;
1115         txq->queue_id = idx;
1116         txq->port_id = dev->data->port_id;
1117         dev->data->tx_queues[idx] = txq;
1118
1119         priv->ppio_params.outqs_params.outqs_params[idx].size = desc;
1120         priv->ppio_params.outqs_params.outqs_params[idx].weight = 1;
1121
1122         return 0;
1123 }
1124
1125 /**
1126  * DPDK callback to release the transmit queue.
1127  *
1128  * @param txq
1129  *   Generic transmit queue pointer.
1130  */
1131 static void
1132 mrvl_tx_queue_release(void *txq)
1133 {
1134         struct mrvl_txq *q = txq;
1135
1136         if (!q)
1137                 return;
1138
1139         rte_free(q);
1140 }
1141
1142 /**
1143  * Update RSS hash configuration
1144  *
1145  * @param dev
1146  *   Pointer to Ethernet device structure.
1147  * @param rss_conf
1148  *   Pointer to RSS configuration.
1149  *
1150  * @return
1151  *   0 on success, negative error value otherwise.
1152  */
1153 static int
1154 mrvl_rss_hash_update(struct rte_eth_dev *dev,
1155                      struct rte_eth_rss_conf *rss_conf)
1156 {
1157         struct mrvl_priv *priv = dev->data->dev_private;
1158
1159         return mrvl_configure_rss(priv, rss_conf);
1160 }
1161
1162 /**
1163  * DPDK callback to get RSS hash configuration.
1164  *
1165  * @param dev
1166  *   Pointer to Ethernet device structure.
1167  * @rss_conf
1168  *   Pointer to RSS configuration.
1169  *
1170  * @return
1171  *   Always 0.
1172  */
1173 static int
1174 mrvl_rss_hash_conf_get(struct rte_eth_dev *dev,
1175                        struct rte_eth_rss_conf *rss_conf)
1176 {
1177         struct mrvl_priv *priv = dev->data->dev_private;
1178         enum pp2_ppio_hash_type hash_type =
1179                 priv->ppio_params.inqs_params.hash_type;
1180
1181         rss_conf->rss_key = NULL;
1182
1183         if (hash_type == PP2_PPIO_HASH_T_NONE)
1184                 rss_conf->rss_hf = 0;
1185         else if (hash_type == PP2_PPIO_HASH_T_2_TUPLE)
1186                 rss_conf->rss_hf = ETH_RSS_IPV4;
1187         else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && priv->rss_hf_tcp)
1188                 rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_TCP;
1189         else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && !priv->rss_hf_tcp)
1190                 rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_UDP;
1191
1192         return 0;
1193 }
1194
1195 static const struct eth_dev_ops mrvl_ops = {
1196         .dev_configure = mrvl_dev_configure,
1197         .dev_start = mrvl_dev_start,
1198         .dev_stop = mrvl_dev_stop,
1199         .dev_set_link_up = mrvl_dev_set_link_up,
1200         .dev_set_link_down = mrvl_dev_set_link_down,
1201         .dev_close = mrvl_dev_close,
1202         .link_update = mrvl_link_update,
1203         .promiscuous_enable = mrvl_promiscuous_enable,
1204         .allmulticast_enable = mrvl_allmulticast_enable,
1205         .promiscuous_disable = mrvl_promiscuous_disable,
1206         .allmulticast_disable = mrvl_allmulticast_disable,
1207         .mac_addr_remove = mrvl_mac_addr_remove,
1208         .mac_addr_add = mrvl_mac_addr_add,
1209         .mac_addr_set = mrvl_mac_addr_set,
1210         .mtu_set = mrvl_mtu_set,
1211         .dev_infos_get = mrvl_dev_infos_get,
1212         .rxq_info_get = mrvl_rxq_info_get,
1213         .txq_info_get = mrvl_txq_info_get,
1214         .rx_queue_setup = mrvl_rx_queue_setup,
1215         .rx_queue_release = mrvl_rx_queue_release,
1216         .tx_queue_setup = mrvl_tx_queue_setup,
1217         .tx_queue_release = mrvl_tx_queue_release,
1218         .rss_hash_update = mrvl_rss_hash_update,
1219         .rss_hash_conf_get = mrvl_rss_hash_conf_get,
1220 };
1221
1222 /**
1223  * DPDK callback for receive.
1224  *
1225  * @param rxq
1226  *   Generic pointer to the receive queue.
1227  * @param rx_pkts
1228  *   Array to store received packets.
1229  * @param nb_pkts
1230  *   Maximum number of packets in array.
1231  *
1232  * @return
1233  *   Number of packets successfully received.
1234  */
1235 static uint16_t
1236 mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1237 {
1238         struct mrvl_rxq *q = rxq;
1239         struct pp2_ppio_desc descs[nb_pkts];
1240         struct pp2_bpool *bpool;
1241         int i, ret, rx_done = 0;
1242         int num;
1243         unsigned int core_id = rte_lcore_id();
1244
1245         if (unlikely(!q->priv->ppio))
1246                 return 0;
1247
1248         bpool = q->priv->bpool;
1249
1250         ret = pp2_ppio_recv(q->priv->ppio, q->priv->rxq_map[q->queue_id].tc,
1251                             q->priv->rxq_map[q->queue_id].inq, descs, &nb_pkts);
1252         if (unlikely(ret < 0)) {
1253                 RTE_LOG(ERR, PMD, "Failed to receive packets\n");
1254                 return 0;
1255         }
1256         mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] -= nb_pkts;
1257
1258         for (i = 0; i < nb_pkts; i++) {
1259                 struct rte_mbuf *mbuf;
1260                 enum pp2_inq_desc_status status;
1261                 uint64_t addr;
1262
1263                 if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
1264                         struct pp2_ppio_desc *pref_desc;
1265                         u64 pref_addr;
1266
1267                         pref_desc = &descs[i + MRVL_MUSDK_PREFETCH_SHIFT];
1268                         pref_addr = cookie_addr_high |
1269                                     pp2_ppio_inq_desc_get_cookie(pref_desc);
1270                         rte_mbuf_prefetch_part1((struct rte_mbuf *)(pref_addr));
1271                         rte_mbuf_prefetch_part2((struct rte_mbuf *)(pref_addr));
1272                 }
1273
1274                 addr = cookie_addr_high |
1275                        pp2_ppio_inq_desc_get_cookie(&descs[i]);
1276                 mbuf = (struct rte_mbuf *)addr;
1277                 rte_pktmbuf_reset(mbuf);
1278
1279                 /* drop packet in case of mac, overrun or resource error */
1280                 status = pp2_ppio_inq_desc_get_l2_pkt_error(&descs[i]);
1281                 if (unlikely(status != PP2_DESC_ERR_OK)) {
1282                         struct pp2_buff_inf binf = {
1283                                 .addr = rte_mbuf_data_dma_addr_default(mbuf),
1284                                 .cookie = (pp2_cookie_t)(uint64_t)mbuf,
1285                         };
1286
1287                         pp2_bpool_put_buff(hifs[core_id], bpool, &binf);
1288                         mrvl_port_bpool_size
1289                                 [bpool->pp2_id][bpool->id][core_id]++;
1290                         continue;
1291                 }
1292
1293                 mbuf->data_off += MRVL_PKT_EFFEC_OFFS;
1294                 mbuf->pkt_len = pp2_ppio_inq_desc_get_pkt_len(&descs[i]);
1295                 mbuf->data_len = mbuf->pkt_len;
1296                 mbuf->port = q->port_id;
1297
1298                 rx_pkts[rx_done++] = mbuf;
1299         }
1300
1301         if (rte_spinlock_trylock(&q->priv->lock) == 1) {
1302                 num = mrvl_get_bpool_size(bpool->pp2_id, bpool->id);
1303
1304                 if (unlikely(num <= q->priv->bpool_min_size ||
1305                              (!rx_done && num < q->priv->bpool_init_size))) {
1306                         ret = mrvl_fill_bpool(q, MRVL_BURST_SIZE);
1307                         if (ret)
1308                                 RTE_LOG(ERR, PMD, "Failed to fill bpool\n");
1309                 } else if (unlikely(num > q->priv->bpool_max_size)) {
1310                         int i;
1311                         int pkt_to_remove = num - q->priv->bpool_init_size;
1312                         struct rte_mbuf *mbuf;
1313                         struct pp2_buff_inf buff;
1314
1315                         RTE_LOG(DEBUG, PMD,
1316                                 "\nport-%d:%d: bpool %d oversize - remove %d buffers (pool size: %d -> %d)\n",
1317                                 bpool->pp2_id, q->priv->ppio->port_id,
1318                                 bpool->id, pkt_to_remove, num,
1319                                 q->priv->bpool_init_size);
1320
1321                         for (i = 0; i < pkt_to_remove; i++) {
1322                                 pp2_bpool_get_buff(hifs[core_id], bpool, &buff);
1323                                 mbuf = (struct rte_mbuf *)
1324                                         (cookie_addr_high | buff.cookie);
1325                                 rte_pktmbuf_free(mbuf);
1326                         }
1327                         mrvl_port_bpool_size
1328                                 [bpool->pp2_id][bpool->id][core_id] -=
1329                                                                 pkt_to_remove;
1330                 }
1331                 rte_spinlock_unlock(&q->priv->lock);
1332         }
1333
1334         return rx_done;
1335 }
1336
1337 /**
1338  * Release already sent buffers to bpool (buffer-pool).
1339  *
1340  * @param ppio
1341  *   Pointer to the port structure.
1342  * @param hif
1343  *   Pointer to the MUSDK hardware interface.
1344  * @param sq
1345  *   Pointer to the shadow queue.
1346  * @param qid
1347  *   Queue id number.
1348  * @param force
1349  *   Force releasing packets.
1350  */
1351 static inline void
1352 mrvl_free_sent_buffers(struct pp2_ppio *ppio, struct pp2_hif *hif,
1353                        struct mrvl_shadow_txq *sq, int qid, int force)
1354 {
1355         struct buff_release_entry *entry;
1356         uint16_t nb_done = 0, num = 0, skip_bufs = 0;
1357         int i, core_id = rte_lcore_id();
1358
1359         pp2_ppio_get_num_outq_done(ppio, hif, qid, &nb_done);
1360
1361         sq->num_to_release += nb_done;
1362
1363         if (likely(!force &&
1364                    sq->num_to_release < MRVL_PP2_BUF_RELEASE_BURST_SIZE))
1365                 return;
1366
1367         nb_done = sq->num_to_release;
1368         sq->num_to_release = 0;
1369
1370         for (i = 0; i < nb_done; i++) {
1371                 entry = &sq->ent[sq->tail + num];
1372                 if (unlikely(!entry->buff.addr)) {
1373                         RTE_LOG(ERR, PMD,
1374                                 "Shadow memory @%d: cookie(%lx), pa(%lx)!\n",
1375                                 sq->tail, (u64)entry->buff.cookie,
1376                                 (u64)entry->buff.addr);
1377                         skip_bufs = 1;
1378                         goto skip;
1379                 }
1380
1381                 if (unlikely(!entry->bpool)) {
1382                         struct rte_mbuf *mbuf;
1383
1384                         mbuf = (struct rte_mbuf *)
1385                                (cookie_addr_high | entry->buff.cookie);
1386                         rte_pktmbuf_free(mbuf);
1387                         skip_bufs = 1;
1388                         goto skip;
1389                 }
1390
1391                 mrvl_port_bpool_size
1392                         [entry->bpool->pp2_id][entry->bpool->id][core_id]++;
1393                 num++;
1394                 if (unlikely(sq->tail + num == MRVL_PP2_TX_SHADOWQ_SIZE))
1395                         goto skip;
1396                 continue;
1397 skip:
1398                 if (likely(num))
1399                         pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
1400                 num += skip_bufs;
1401                 sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
1402                 sq->size -= num;
1403                 num = 0;
1404         }
1405
1406         if (likely(num)) {
1407                 pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
1408                 sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
1409                 sq->size -= num;
1410         }
1411 }
1412
1413 /**
1414  * DPDK callback for transmit.
1415  *
1416  * @param txq
1417  *   Generic pointer transmit queue.
1418  * @param tx_pkts
1419  *   Packets to transmit.
1420  * @param nb_pkts
1421  *   Number of packets in array.
1422  *
1423  * @return
1424  *   Number of packets successfully transmitted.
1425  */
1426 static uint16_t
1427 mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1428 {
1429         struct mrvl_txq *q = txq;
1430         struct mrvl_shadow_txq *sq = &shadow_txqs[q->port_id][rte_lcore_id()];
1431         struct pp2_hif *hif = hifs[rte_lcore_id()];
1432         struct pp2_ppio_desc descs[nb_pkts];
1433         int i;
1434         uint16_t num, sq_free_size;
1435
1436         if (unlikely(!q->priv->ppio))
1437                 return 0;
1438
1439         if (sq->size)
1440                 mrvl_free_sent_buffers(q->priv->ppio, hif, sq, q->queue_id, 0);
1441
1442         sq_free_size = MRVL_PP2_TX_SHADOWQ_SIZE - sq->size - 1;
1443         if (unlikely(nb_pkts > sq_free_size)) {
1444                 RTE_LOG(DEBUG, PMD,
1445                         "No room in shadow queue for %d packets! %d packets will be sent.\n",
1446                         nb_pkts, sq_free_size);
1447                 nb_pkts = sq_free_size;
1448         }
1449
1450         for (i = 0; i < nb_pkts; i++) {
1451                 struct rte_mbuf *mbuf = tx_pkts[i];
1452
1453                 if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
1454                         struct rte_mbuf *pref_pkt_hdr;
1455
1456                         pref_pkt_hdr = tx_pkts[i + MRVL_MUSDK_PREFETCH_SHIFT];
1457                         rte_mbuf_prefetch_part1(pref_pkt_hdr);
1458                         rte_mbuf_prefetch_part2(pref_pkt_hdr);
1459                 }
1460
1461                 sq->ent[sq->head].buff.cookie = (pp2_cookie_t)(uint64_t)mbuf;
1462                 sq->ent[sq->head].buff.addr =
1463                         rte_mbuf_data_dma_addr_default(mbuf);
1464                 sq->ent[sq->head].bpool =
1465                         (unlikely(mbuf->port == 0xff || mbuf->refcnt > 1)) ?
1466                          NULL : mrvl_port_to_bpool_lookup[mbuf->port];
1467                 sq->head = (sq->head + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
1468                 sq->size++;
1469
1470                 pp2_ppio_outq_desc_reset(&descs[i]);
1471                 pp2_ppio_outq_desc_set_phys_addr(&descs[i],
1472                                                  rte_pktmbuf_mtophys(mbuf));
1473                 pp2_ppio_outq_desc_set_pkt_offset(&descs[i], 0);
1474                 pp2_ppio_outq_desc_set_pkt_len(&descs[i],
1475                                                rte_pktmbuf_pkt_len(mbuf));
1476         }
1477
1478         num = nb_pkts;
1479         pp2_ppio_send(q->priv->ppio, hif, q->queue_id, descs, &nb_pkts);
1480         /* number of packets that were not sent */
1481         if (unlikely(num > nb_pkts)) {
1482                 for (i = nb_pkts; i < num; i++) {
1483                         sq->head = (MRVL_PP2_TX_SHADOWQ_SIZE + sq->head - 1) &
1484                                 MRVL_PP2_TX_SHADOWQ_MASK;
1485                 }
1486                 sq->size -= num - nb_pkts;
1487         }
1488
1489         return nb_pkts;
1490 }
1491
1492 /**
1493  * Initialize packet processor.
1494  *
1495  * @return
1496  *   0 on success, negative error value otherwise.
1497  */
1498 static int
1499 mrvl_init_pp2(void)
1500 {
1501         struct pp2_init_params init_params;
1502
1503         memset(&init_params, 0, sizeof(init_params));
1504         init_params.hif_reserved_map = MRVL_MUSDK_HIFS_RESERVED;
1505         init_params.bm_pool_reserved_map = MRVL_MUSDK_BPOOLS_RESERVED;
1506         init_params.rss_tbl_reserved_map = MRVL_MUSDK_RSS_RESERVED;
1507
1508         return pp2_init(&init_params);
1509 }
1510
1511 /**
1512  * Deinitialize packet processor.
1513  *
1514  * @return
1515  *   0 on success, negative error value otherwise.
1516  */
1517 static void
1518 mrvl_deinit_pp2(void)
1519 {
1520         pp2_deinit();
1521 }
1522
1523 /**
1524  * Create private device structure.
1525  *
1526  * @param dev_name
1527  *   Pointer to the port name passed in the initialization parameters.
1528  *
1529  * @return
1530  *   Pointer to the newly allocated private device structure.
1531  */
1532 static struct mrvl_priv *
1533 mrvl_priv_create(const char *dev_name)
1534 {
1535         struct pp2_bpool_params bpool_params;
1536         char match[MRVL_MATCH_LEN];
1537         struct mrvl_priv *priv;
1538         int ret, bpool_bit;
1539
1540         priv = rte_zmalloc_socket(dev_name, sizeof(*priv), 0, rte_socket_id());
1541         if (!priv)
1542                 return NULL;
1543
1544         ret = pp2_netdev_get_ppio_info((char *)(uintptr_t)dev_name,
1545                                        &priv->pp_id, &priv->ppio_id);
1546         if (ret)
1547                 goto out_free_priv;
1548
1549         bpool_bit = mrvl_reserve_bit(&used_bpools[priv->pp_id],
1550                                      PP2_BPOOL_NUM_POOLS);
1551         if (bpool_bit < 0)
1552                 goto out_free_priv;
1553         priv->bpool_bit = bpool_bit;
1554
1555         snprintf(match, sizeof(match), "pool-%d:%d", priv->pp_id,
1556                  priv->bpool_bit);
1557         memset(&bpool_params, 0, sizeof(bpool_params));
1558         bpool_params.match = match;
1559         bpool_params.buff_len = MRVL_PKT_SIZE_MAX + MRVL_PKT_EFFEC_OFFS;
1560         ret = pp2_bpool_init(&bpool_params, &priv->bpool);
1561         if (ret)
1562                 goto out_clear_bpool_bit;
1563
1564         priv->ppio_params.type = PP2_PPIO_T_NIC;
1565         rte_spinlock_init(&priv->lock);
1566
1567         return priv;
1568 out_clear_bpool_bit:
1569         used_bpools[priv->pp_id] &= ~(1 << priv->bpool_bit);
1570 out_free_priv:
1571         rte_free(priv);
1572         return NULL;
1573 }
1574
1575 /**
1576  * Create device representing Ethernet port.
1577  *
1578  * @param name
1579  *   Pointer to the port's name.
1580  *
1581  * @return
1582  *   0 on success, negative error value otherwise.
1583  */
1584 static int
1585 mrvl_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
1586 {
1587         int ret, fd = socket(AF_INET, SOCK_DGRAM, 0);
1588         struct rte_eth_dev *eth_dev;
1589         struct mrvl_priv *priv;
1590         struct ifreq req;
1591
1592         eth_dev = rte_eth_dev_allocate(name);
1593         if (!eth_dev)
1594                 return -ENOMEM;
1595
1596         priv = mrvl_priv_create(name);
1597         if (!priv) {
1598                 ret = -ENOMEM;
1599                 goto out_free_dev;
1600         }
1601
1602         eth_dev->data->mac_addrs =
1603                 rte_zmalloc("mac_addrs",
1604                             ETHER_ADDR_LEN * MRVL_MAC_ADDRS_MAX, 0);
1605         if (!eth_dev->data->mac_addrs) {
1606                 RTE_LOG(ERR, PMD, "Failed to allocate space for eth addrs\n");
1607                 ret = -ENOMEM;
1608                 goto out_free_priv;
1609         }
1610
1611         memset(&req, 0, sizeof(req));
1612         strcpy(req.ifr_name, name);
1613         ret = ioctl(fd, SIOCGIFHWADDR, &req);
1614         if (ret)
1615                 goto out_free_mac;
1616
1617         memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1618                req.ifr_addr.sa_data, ETHER_ADDR_LEN);
1619
1620         eth_dev->rx_pkt_burst = mrvl_rx_pkt_burst;
1621         eth_dev->tx_pkt_burst = mrvl_tx_pkt_burst;
1622         eth_dev->data->dev_private = priv;
1623         eth_dev->device = &vdev->device;
1624         eth_dev->dev_ops = &mrvl_ops;
1625
1626         return 0;
1627 out_free_mac:
1628         rte_free(eth_dev->data->mac_addrs);
1629 out_free_dev:
1630         rte_eth_dev_release_port(eth_dev);
1631 out_free_priv:
1632         rte_free(priv);
1633
1634         return ret;
1635 }
1636
1637 /**
1638  * Cleanup previously created device representing Ethernet port.
1639  *
1640  * @param name
1641  *   Pointer to the port name.
1642  */
1643 static void
1644 mrvl_eth_dev_destroy(const char *name)
1645 {
1646         struct rte_eth_dev *eth_dev;
1647         struct mrvl_priv *priv;
1648
1649         eth_dev = rte_eth_dev_allocated(name);
1650         if (!eth_dev)
1651                 return;
1652
1653         priv = eth_dev->data->dev_private;
1654         pp2_bpool_deinit(priv->bpool);
1655         rte_free(priv);
1656         rte_free(eth_dev->data->mac_addrs);
1657         rte_eth_dev_release_port(eth_dev);
1658 }
1659
1660 /**
1661  * Callback used by rte_kvargs_process() during argument parsing.
1662  *
1663  * @param key
1664  *   Pointer to the parsed key (unused).
1665  * @param value
1666  *   Pointer to the parsed value.
1667  * @param extra_args
1668  *   Pointer to the extra arguments which contains address of the
1669  *   table of pointers to parsed interface names.
1670  *
1671  * @return
1672  *   Always 0.
1673  */
1674 static int
1675 mrvl_get_ifnames(const char *key __rte_unused, const char *value,
1676                  void *extra_args)
1677 {
1678         const char **ifnames = extra_args;
1679
1680         ifnames[mrvl_ports_nb++] = value;
1681
1682         return 0;
1683 }
1684
1685 /**
1686  * Initialize per-lcore MUSDK hardware interfaces (hifs).
1687  *
1688  * @return
1689  *   0 on success, negative error value otherwise.
1690  */
1691 static int
1692 mrvl_init_hifs(void)
1693 {
1694         struct pp2_hif_params params;
1695         char match[MRVL_MATCH_LEN];
1696         int i, ret;
1697
1698         RTE_LCORE_FOREACH(i) {
1699                 ret = mrvl_reserve_bit(&used_hifs, MRVL_MUSDK_HIFS_MAX);
1700                 if (ret < 0)
1701                         return ret;
1702
1703                 snprintf(match, sizeof(match), "hif-%d", ret);
1704                 memset(&params, 0, sizeof(params));
1705                 params.match = match;
1706                 params.out_size = MRVL_PP2_AGGR_TXQD_MAX;
1707                 ret = pp2_hif_init(&params, &hifs[i]);
1708                 if (ret) {
1709                         RTE_LOG(ERR, PMD, "Failed to initialize hif %d\n", i);
1710                         return ret;
1711                 }
1712         }
1713
1714         return 0;
1715 }
1716
1717 /**
1718  * Deinitialize per-lcore MUSDK hardware interfaces (hifs).
1719  */
1720 static void
1721 mrvl_deinit_hifs(void)
1722 {
1723         int i;
1724
1725         RTE_LCORE_FOREACH(i) {
1726                 if (hifs[i])
1727                         pp2_hif_deinit(hifs[i]);
1728         }
1729 }
1730
1731 static void mrvl_set_first_last_cores(int core_id)
1732 {
1733         if (core_id < mrvl_lcore_first)
1734                 mrvl_lcore_first = core_id;
1735
1736         if (core_id > mrvl_lcore_last)
1737                 mrvl_lcore_last = core_id;
1738 }
1739
1740 /**
1741  * DPDK callback to register the virtual device.
1742  *
1743  * @param vdev
1744  *   Pointer to the virtual device.
1745  *
1746  * @return
1747  *   0 on success, negative error value otherwise.
1748  */
1749 static int
1750 rte_pmd_mrvl_probe(struct rte_vdev_device *vdev)
1751 {
1752         struct rte_kvargs *kvlist;
1753         const char *ifnames[PP2_NUM_ETH_PPIO * PP2_NUM_PKT_PROC];
1754         int ret = -EINVAL;
1755         uint32_t i, ifnum, cfgnum, core_id;
1756         const char *params;
1757
1758         params = rte_vdev_device_args(vdev);
1759         if (!params)
1760                 return -EINVAL;
1761
1762         kvlist = rte_kvargs_parse(params, valid_args);
1763         if (!kvlist)
1764                 return -EINVAL;
1765
1766         ifnum = rte_kvargs_count(kvlist, MRVL_IFACE_NAME_ARG);
1767         if (ifnum > RTE_DIM(ifnames))
1768                 goto out_free_kvlist;
1769
1770         rte_kvargs_process(kvlist, MRVL_IFACE_NAME_ARG,
1771                            mrvl_get_ifnames, &ifnames);
1772
1773         cfgnum = rte_kvargs_count(kvlist, MRVL_CFG_ARG);
1774         if (cfgnum > 1) {
1775                 RTE_LOG(ERR, PMD, "Cannot handle more than one config file!\n");
1776                 goto out_free_kvlist;
1777         } else if (cfgnum == 1) {
1778                 rte_kvargs_process(kvlist, MRVL_CFG_ARG,
1779                                    mrvl_get_qoscfg, &mrvl_qos_cfg);
1780         }
1781
1782         /*
1783          * ret == -EEXIST is correct, it means DMA
1784          * has been already initialized (by another PMD).
1785          */
1786         ret = mv_sys_dma_mem_init(RTE_MRVL_MUSDK_DMA_MEMSIZE);
1787         if (ret < 0 && ret != -EEXIST)
1788                 goto out_free_kvlist;
1789
1790         ret = mrvl_init_pp2();
1791         if (ret) {
1792                 RTE_LOG(ERR, PMD, "Failed to init PP!\n");
1793                 goto out_deinit_dma;
1794         }
1795
1796         ret = mrvl_init_hifs();
1797         if (ret)
1798                 goto out_deinit_hifs;
1799
1800         for (i = 0; i < ifnum; i++) {
1801                 RTE_LOG(INFO, PMD, "Creating %s\n", ifnames[i]);
1802                 ret = mrvl_eth_dev_create(vdev, ifnames[i]);
1803                 if (ret)
1804                         goto out_cleanup;
1805         }
1806
1807         rte_kvargs_free(kvlist);
1808
1809         memset(mrvl_port_bpool_size, 0, sizeof(mrvl_port_bpool_size));
1810
1811         mrvl_lcore_first = RTE_MAX_LCORE;
1812         mrvl_lcore_last = 0;
1813
1814         RTE_LCORE_FOREACH(core_id) {
1815                 mrvl_set_first_last_cores(core_id);
1816         }
1817
1818         return 0;
1819 out_cleanup:
1820         for (; i > 0; i--)
1821                 mrvl_eth_dev_destroy(ifnames[i]);
1822 out_deinit_hifs:
1823         mrvl_deinit_hifs();
1824         mrvl_deinit_pp2();
1825 out_deinit_dma:
1826         mv_sys_dma_mem_destroy();
1827 out_free_kvlist:
1828         rte_kvargs_free(kvlist);
1829
1830         return ret;
1831 }
1832
1833 /**
1834  * DPDK callback to remove virtual device.
1835  *
1836  * @param vdev
1837  *   Pointer to the removed virtual device.
1838  *
1839  * @return
1840  *   0 on success, negative error value otherwise.
1841  */
1842 static int
1843 rte_pmd_mrvl_remove(struct rte_vdev_device *vdev)
1844 {
1845         int i;
1846         const char *name;
1847
1848         name = rte_vdev_device_name(vdev);
1849         if (!name)
1850                 return -EINVAL;
1851
1852         RTE_LOG(INFO, PMD, "Removing %s\n", name);
1853
1854         for (i = 0; i < rte_eth_dev_count(); i++) {
1855                 char ifname[RTE_ETH_NAME_MAX_LEN];
1856
1857                 rte_eth_dev_get_name_by_port(i, ifname);
1858                 mrvl_eth_dev_destroy(ifname);
1859         }
1860
1861         mrvl_deinit_hifs();
1862         mrvl_deinit_pp2();
1863         mv_sys_dma_mem_destroy();
1864
1865         return 0;
1866 }
1867
1868 static struct rte_vdev_driver pmd_mrvl_drv = {
1869         .probe = rte_pmd_mrvl_probe,
1870         .remove = rte_pmd_mrvl_remove,
1871 };
1872
1873 RTE_PMD_REGISTER_VDEV(net_mrvl, pmd_mrvl_drv);
1874 RTE_PMD_REGISTER_ALIAS(net_mrvl, eth_mrvl);