net/mrvl: support VLAN filtering
[dpdk.git] / drivers / net / mrvl / mrvl_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Semihalf. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Semihalf nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_ethdev.h>
34 #include <rte_kvargs.h>
35 #include <rte_log.h>
36 #include <rte_malloc.h>
37 #include <rte_vdev.h>
38
39 /* Unluckily, container_of is defined by both DPDK and MUSDK,
40  * we'll declare only one version.
41  *
42  * Note that it is not used in this PMD anyway.
43  */
44 #ifdef container_of
45 #undef container_of
46 #endif
47
48 #include <drivers/mv_pp2.h>
49 #include <drivers/mv_pp2_bpool.h>
50 #include <drivers/mv_pp2_hif.h>
51
52 #include <fcntl.h>
53 #include <linux/ethtool.h>
54 #include <linux/sockios.h>
55 #include <net/if.h>
56 #include <net/if_arp.h>
57 #include <sys/ioctl.h>
58 #include <sys/socket.h>
59 #include <sys/stat.h>
60 #include <sys/types.h>
61
62 #include "mrvl_ethdev.h"
63 #include "mrvl_qos.h"
64
65 /* bitmask with reserved hifs */
66 #define MRVL_MUSDK_HIFS_RESERVED 0x0F
67 /* bitmask with reserved bpools */
68 #define MRVL_MUSDK_BPOOLS_RESERVED 0x07
69 /* bitmask with reserved kernel RSS tables */
70 #define MRVL_MUSDK_RSS_RESERVED 0x01
71 /* maximum number of available hifs */
72 #define MRVL_MUSDK_HIFS_MAX 9
73
74 /* prefetch shift */
75 #define MRVL_MUSDK_PREFETCH_SHIFT 2
76
77 /* TCAM has 25 entries reserved for uc/mc filter entries */
78 #define MRVL_MAC_ADDRS_MAX 25
79 #define MRVL_MATCH_LEN 16
80 #define MRVL_PKT_EFFEC_OFFS (MRVL_PKT_OFFS + MV_MH_SIZE)
81 /* Maximum allowable packet size */
82 #define MRVL_PKT_SIZE_MAX (10240 - MV_MH_SIZE)
83
84 #define MRVL_IFACE_NAME_ARG "iface"
85 #define MRVL_CFG_ARG "cfg"
86
87 #define MRVL_BURST_SIZE 64
88
89 #define MRVL_ARP_LENGTH 28
90
91 #define MRVL_COOKIE_ADDR_INVALID ~0ULL
92
93 #define MRVL_COOKIE_HIGH_ADDR_SHIFT     (sizeof(pp2_cookie_t) * 8)
94 #define MRVL_COOKIE_HIGH_ADDR_MASK      (~0ULL << MRVL_COOKIE_HIGH_ADDR_SHIFT)
95
96 static const char * const valid_args[] = {
97         MRVL_IFACE_NAME_ARG,
98         MRVL_CFG_ARG,
99         NULL
100 };
101
102 static int used_hifs = MRVL_MUSDK_HIFS_RESERVED;
103 static struct pp2_hif *hifs[RTE_MAX_LCORE];
104 static int used_bpools[PP2_NUM_PKT_PROC] = {
105         MRVL_MUSDK_BPOOLS_RESERVED,
106         MRVL_MUSDK_BPOOLS_RESERVED
107 };
108
109 struct pp2_bpool *mrvl_port_to_bpool_lookup[RTE_MAX_ETHPORTS];
110 int mrvl_port_bpool_size[PP2_NUM_PKT_PROC][PP2_BPOOL_NUM_POOLS][RTE_MAX_LCORE];
111 uint64_t cookie_addr_high = MRVL_COOKIE_ADDR_INVALID;
112
113 /*
114  * To use buffer harvesting based on loopback port shadow queue structure
115  * was introduced for buffers information bookkeeping.
116  *
117  * Before sending the packet, related buffer information (pp2_buff_inf) is
118  * stored in shadow queue. After packet is transmitted no longer used
119  * packet buffer is released back to it's original hardware pool,
120  * on condition it originated from interface.
121  * In case it  was generated by application itself i.e: mbuf->port field is
122  * 0xff then its released to software mempool.
123  */
124 struct mrvl_shadow_txq {
125         int head;           /* write index - used when sending buffers */
126         int tail;           /* read index - used when releasing buffers */
127         u16 size;           /* queue occupied size */
128         u16 num_to_release; /* number of buffers sent, that can be released */
129         struct buff_release_entry ent[MRVL_PP2_TX_SHADOWQ_SIZE]; /* q entries */
130 };
131
132 struct mrvl_rxq {
133         struct mrvl_priv *priv;
134         struct rte_mempool *mp;
135         int queue_id;
136         int port_id;
137 };
138
139 struct mrvl_txq {
140         struct mrvl_priv *priv;
141         int queue_id;
142         int port_id;
143 };
144
145 /*
146  * Every tx queue should have dedicated shadow tx queue.
147  *
148  * Ports assigned by DPDK might not start at zero or be continuous so
149  * as a workaround define shadow queues for each possible port so that
150  * we eventually fit somewhere.
151  */
152 struct mrvl_shadow_txq shadow_txqs[RTE_MAX_ETHPORTS][RTE_MAX_LCORE];
153
154 /** Number of ports configured. */
155 int mrvl_ports_nb;
156 static int mrvl_lcore_first;
157 static int mrvl_lcore_last;
158
159 static inline int
160 mrvl_get_bpool_size(int pp2_id, int pool_id)
161 {
162         int i;
163         int size = 0;
164
165         for (i = mrvl_lcore_first; i <= mrvl_lcore_last; i++)
166                 size += mrvl_port_bpool_size[pp2_id][pool_id][i];
167
168         return size;
169 }
170
171 static inline int
172 mrvl_reserve_bit(int *bitmap, int max)
173 {
174         int n = sizeof(*bitmap) * 8 - __builtin_clz(*bitmap);
175
176         if (n >= max)
177                 return -1;
178
179         *bitmap |= 1 << n;
180
181         return n;
182 }
183
184 /**
185  * Configure rss based on dpdk rss configuration.
186  *
187  * @param priv
188  *   Pointer to private structure.
189  * @param rss_conf
190  *   Pointer to RSS configuration.
191  *
192  * @return
193  *   0 on success, negative error value otherwise.
194  */
195 static int
196 mrvl_configure_rss(struct mrvl_priv *priv, struct rte_eth_rss_conf *rss_conf)
197 {
198         if (rss_conf->rss_key)
199                 RTE_LOG(WARNING, PMD, "Changing hash key is not supported\n");
200
201         if (rss_conf->rss_hf == 0) {
202                 priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
203         } else if (rss_conf->rss_hf & ETH_RSS_IPV4) {
204                 priv->ppio_params.inqs_params.hash_type =
205                         PP2_PPIO_HASH_T_2_TUPLE;
206         } else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) {
207                 priv->ppio_params.inqs_params.hash_type =
208                         PP2_PPIO_HASH_T_5_TUPLE;
209                 priv->rss_hf_tcp = 1;
210         } else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
211                 priv->ppio_params.inqs_params.hash_type =
212                         PP2_PPIO_HASH_T_5_TUPLE;
213                 priv->rss_hf_tcp = 0;
214         } else {
215                 return -EINVAL;
216         }
217
218         return 0;
219 }
220
221 /**
222  * Ethernet device configuration.
223  *
224  * Prepare the driver for a given number of TX and RX queues and
225  * configure RSS.
226  *
227  * @param dev
228  *   Pointer to Ethernet device structure.
229  *
230  * @return
231  *   0 on success, negative error value otherwise.
232  */
233 static int
234 mrvl_dev_configure(struct rte_eth_dev *dev)
235 {
236         struct mrvl_priv *priv = dev->data->dev_private;
237         int ret;
238
239         if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_NONE &&
240             dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
241                 RTE_LOG(INFO, PMD, "Unsupported rx multi queue mode %d\n",
242                         dev->data->dev_conf.rxmode.mq_mode);
243                 return -EINVAL;
244         }
245
246         if (!dev->data->dev_conf.rxmode.hw_strip_crc) {
247                 RTE_LOG(INFO, PMD,
248                         "L2 CRC stripping is always enabled in hw\n");
249                 dev->data->dev_conf.rxmode.hw_strip_crc = 1;
250         }
251
252         if (dev->data->dev_conf.rxmode.hw_vlan_strip) {
253                 RTE_LOG(INFO, PMD, "VLAN stripping not supported\n");
254                 return -EINVAL;
255         }
256
257         if (dev->data->dev_conf.rxmode.split_hdr_size) {
258                 RTE_LOG(INFO, PMD, "Split headers not supported\n");
259                 return -EINVAL;
260         }
261
262         if (dev->data->dev_conf.rxmode.enable_scatter) {
263                 RTE_LOG(INFO, PMD, "RX Scatter/Gather not supported\n");
264                 return -EINVAL;
265         }
266
267         if (dev->data->dev_conf.rxmode.enable_lro) {
268                 RTE_LOG(INFO, PMD, "LRO not supported\n");
269                 return -EINVAL;
270         }
271
272         if (dev->data->dev_conf.rxmode.jumbo_frame)
273                 dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
274                                  ETHER_HDR_LEN - ETHER_CRC_LEN;
275
276         ret = mrvl_configure_rxqs(priv, dev->data->port_id,
277                                   dev->data->nb_rx_queues);
278         if (ret < 0)
279                 return ret;
280
281         priv->ppio_params.outqs_params.num_outqs = dev->data->nb_tx_queues;
282         priv->nb_rx_queues = dev->data->nb_rx_queues;
283
284         if (dev->data->nb_rx_queues == 1 &&
285             dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
286                 RTE_LOG(WARNING, PMD, "Disabling hash for 1 rx queue\n");
287                 priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
288
289                 return 0;
290         }
291
292         return mrvl_configure_rss(priv,
293                                   &dev->data->dev_conf.rx_adv_conf.rss_conf);
294 }
295
296 /**
297  * DPDK callback to change the MTU.
298  *
299  * Setting the MTU affects hardware MRU (packets larger than the MRU
300  * will be dropped).
301  *
302  * @param dev
303  *   Pointer to Ethernet device structure.
304  * @param mtu
305  *   New MTU.
306  *
307  * @return
308  *   0 on success, negative error value otherwise.
309  */
310 static int
311 mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
312 {
313         struct mrvl_priv *priv = dev->data->dev_private;
314         /* extra MV_MH_SIZE bytes are required for Marvell tag */
315         uint16_t mru = mtu + MV_MH_SIZE + ETHER_HDR_LEN + ETHER_CRC_LEN;
316         int ret;
317
318         if (mtu < ETHER_MIN_MTU || mru > MRVL_PKT_SIZE_MAX)
319                 return -EINVAL;
320
321         ret = pp2_ppio_set_mru(priv->ppio, mru);
322         if (ret)
323                 return ret;
324
325         return pp2_ppio_set_mtu(priv->ppio, mtu);
326 }
327
328 /**
329  * DPDK callback to bring the link up.
330  *
331  * @param dev
332  *   Pointer to Ethernet device structure.
333  *
334  * @return
335  *   0 on success, negative error value otherwise.
336  */
337 static int
338 mrvl_dev_set_link_up(struct rte_eth_dev *dev)
339 {
340         struct mrvl_priv *priv = dev->data->dev_private;
341         int ret;
342
343         ret = pp2_ppio_enable(priv->ppio);
344         if (ret)
345                 return ret;
346
347         /*
348          * mtu/mru can be updated if pp2_ppio_enable() was called at least once
349          * as pp2_ppio_enable() changes port->t_mode from default 0 to
350          * PP2_TRAFFIC_INGRESS_EGRESS.
351          *
352          * Set mtu to default DPDK value here.
353          */
354         ret = mrvl_mtu_set(dev, dev->data->mtu);
355         if (ret)
356                 pp2_ppio_disable(priv->ppio);
357
358         dev->data->dev_link.link_status = ETH_LINK_UP;
359
360         return ret;
361 }
362
363 /**
364  * DPDK callback to bring the link down.
365  *
366  * @param dev
367  *   Pointer to Ethernet device structure.
368  *
369  * @return
370  *   0 on success, negative error value otherwise.
371  */
372 static int
373 mrvl_dev_set_link_down(struct rte_eth_dev *dev)
374 {
375         struct mrvl_priv *priv = dev->data->dev_private;
376         int ret;
377
378         ret = pp2_ppio_disable(priv->ppio);
379         if (ret)
380                 return ret;
381
382         dev->data->dev_link.link_status = ETH_LINK_DOWN;
383
384         return ret;
385 }
386
387 /**
388  * DPDK callback to start the device.
389  *
390  * @param dev
391  *   Pointer to Ethernet device structure.
392  *
393  * @return
394  *   0 on success, negative errno value on failure.
395  */
396 static int
397 mrvl_dev_start(struct rte_eth_dev *dev)
398 {
399         struct mrvl_priv *priv = dev->data->dev_private;
400         char match[MRVL_MATCH_LEN];
401         int ret;
402
403         snprintf(match, sizeof(match), "ppio-%d:%d",
404                  priv->pp_id, priv->ppio_id);
405         priv->ppio_params.match = match;
406
407         /*
408          * Calculate the maximum bpool size for refill feature to 1.5 of the
409          * configured size. In case the bpool size will exceed this value,
410          * superfluous buffers will be removed
411          */
412         priv->bpool_max_size = priv->bpool_init_size +
413                               (priv->bpool_init_size >> 1);
414         /*
415          * Calculate the minimum bpool size for refill feature as follows:
416          * 2 default burst sizes multiply by number of rx queues.
417          * If the bpool size will be below this value, new buffers will
418          * be added to the pool.
419          */
420         priv->bpool_min_size = priv->nb_rx_queues * MRVL_BURST_SIZE * 2;
421
422         ret = pp2_ppio_init(&priv->ppio_params, &priv->ppio);
423         if (ret)
424                 return ret;
425
426         /*
427          * In case there are some some stale uc/mc mac addresses flush them
428          * here. It cannot be done during mrvl_dev_close() as port information
429          * is already gone at that point (due to pp2_ppio_deinit() in
430          * mrvl_dev_stop()).
431          */
432         if (!priv->uc_mc_flushed) {
433                 ret = pp2_ppio_flush_mac_addrs(priv->ppio, 1, 1);
434                 if (ret) {
435                         RTE_LOG(ERR, PMD,
436                                 "Failed to flush uc/mc filter list\n");
437                         goto out;
438                 }
439                 priv->uc_mc_flushed = 1;
440         }
441
442         if (!priv->vlan_flushed) {
443                 ret = pp2_ppio_flush_vlan(priv->ppio);
444                 if (ret) {
445                         RTE_LOG(ERR, PMD, "Failed to flush vlan list\n");
446                         /*
447                          * TODO
448                          * once pp2_ppio_flush_vlan() is supported jump to out
449                          * goto out;
450                          */
451                 }
452                 priv->vlan_flushed = 1;
453         }
454
455         /* For default QoS config, don't start classifier. */
456         if (mrvl_qos_cfg) {
457                 ret = mrvl_start_qos_mapping(priv);
458                 if (ret) {
459                         pp2_ppio_deinit(priv->ppio);
460                         return ret;
461                 }
462         }
463
464         ret = mrvl_dev_set_link_up(dev);
465         if (ret)
466                 goto out;
467
468         return 0;
469 out:
470         pp2_ppio_deinit(priv->ppio);
471         return ret;
472 }
473
474 /**
475  * Flush receive queues.
476  *
477  * @param dev
478  *   Pointer to Ethernet device structure.
479  */
480 static void
481 mrvl_flush_rx_queues(struct rte_eth_dev *dev)
482 {
483         int i;
484
485         RTE_LOG(INFO, PMD, "Flushing rx queues\n");
486         for (i = 0; i < dev->data->nb_rx_queues; i++) {
487                 int ret, num;
488
489                 do {
490                         struct mrvl_rxq *q = dev->data->rx_queues[i];
491                         struct pp2_ppio_desc descs[MRVL_PP2_RXD_MAX];
492
493                         num = MRVL_PP2_RXD_MAX;
494                         ret = pp2_ppio_recv(q->priv->ppio,
495                                             q->priv->rxq_map[q->queue_id].tc,
496                                             q->priv->rxq_map[q->queue_id].inq,
497                                             descs, (uint16_t *)&num);
498                 } while (ret == 0 && num);
499         }
500 }
501
502 /**
503  * Flush transmit shadow queues.
504  *
505  * @param dev
506  *   Pointer to Ethernet device structure.
507  */
508 static void
509 mrvl_flush_tx_shadow_queues(struct rte_eth_dev *dev)
510 {
511         int i;
512
513         RTE_LOG(INFO, PMD, "Flushing tx shadow queues\n");
514         for (i = 0; i < RTE_MAX_LCORE; i++) {
515                 struct mrvl_shadow_txq *sq =
516                         &shadow_txqs[dev->data->port_id][i];
517
518                 while (sq->tail != sq->head) {
519                         uint64_t addr = cookie_addr_high |
520                                         sq->ent[sq->tail].buff.cookie;
521                         rte_pktmbuf_free((struct rte_mbuf *)addr);
522                         sq->tail = (sq->tail + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
523                 }
524
525                 memset(sq, 0, sizeof(*sq));
526         }
527 }
528
529 /**
530  * Flush hardware bpool (buffer-pool).
531  *
532  * @param dev
533  *   Pointer to Ethernet device structure.
534  */
535 static void
536 mrvl_flush_bpool(struct rte_eth_dev *dev)
537 {
538         struct mrvl_priv *priv = dev->data->dev_private;
539         uint32_t num;
540         int ret;
541
542         ret = pp2_bpool_get_num_buffs(priv->bpool, &num);
543         if (ret) {
544                 RTE_LOG(ERR, PMD, "Failed to get bpool buffers number\n");
545                 return;
546         }
547
548         while (num--) {
549                 struct pp2_buff_inf inf;
550                 uint64_t addr;
551
552                 ret = pp2_bpool_get_buff(hifs[rte_lcore_id()], priv->bpool,
553                                          &inf);
554                 if (ret)
555                         break;
556
557                 addr = cookie_addr_high | inf.cookie;
558                 rte_pktmbuf_free((struct rte_mbuf *)addr);
559         }
560 }
561
562 /**
563  * DPDK callback to stop the device.
564  *
565  * @param dev
566  *   Pointer to Ethernet device structure.
567  */
568 static void
569 mrvl_dev_stop(struct rte_eth_dev *dev)
570 {
571         struct mrvl_priv *priv = dev->data->dev_private;
572
573         mrvl_dev_set_link_down(dev);
574         mrvl_flush_rx_queues(dev);
575         mrvl_flush_tx_shadow_queues(dev);
576         if (priv->qos_tbl)
577                 pp2_cls_qos_tbl_deinit(priv->qos_tbl);
578         pp2_ppio_deinit(priv->ppio);
579         priv->ppio = NULL;
580 }
581
582 /**
583  * DPDK callback to close the device.
584  *
585  * @param dev
586  *   Pointer to Ethernet device structure.
587  */
588 static void
589 mrvl_dev_close(struct rte_eth_dev *dev)
590 {
591         struct mrvl_priv *priv = dev->data->dev_private;
592         size_t i;
593
594         for (i = 0; i < priv->ppio_params.inqs_params.num_tcs; ++i) {
595                 struct pp2_ppio_tc_params *tc_params =
596                         &priv->ppio_params.inqs_params.tcs_params[i];
597
598                 if (tc_params->inqs_params) {
599                         rte_free(tc_params->inqs_params);
600                         tc_params->inqs_params = NULL;
601                 }
602         }
603
604         mrvl_flush_bpool(dev);
605 }
606
607 /**
608  * DPDK callback to retrieve physical link information.
609  *
610  * @param dev
611  *   Pointer to Ethernet device structure.
612  * @param wait_to_complete
613  *   Wait for request completion (ignored).
614  *
615  * @return
616  *   0 on success, negative error value otherwise.
617  */
618 static int
619 mrvl_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
620 {
621         /*
622          * TODO
623          * once MUSDK provides necessary API use it here
624          */
625         struct ethtool_cmd edata;
626         struct ifreq req;
627         int ret, fd;
628
629         edata.cmd = ETHTOOL_GSET;
630
631         strcpy(req.ifr_name, dev->data->name);
632         req.ifr_data = (void *)&edata;
633
634         fd = socket(AF_INET, SOCK_DGRAM, 0);
635         if (fd == -1)
636                 return -EFAULT;
637
638         ret = ioctl(fd, SIOCETHTOOL, &req);
639         if (ret == -1) {
640                 close(fd);
641                 return -EFAULT;
642         }
643
644         close(fd);
645
646         switch (ethtool_cmd_speed(&edata)) {
647         case SPEED_10:
648                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_10M;
649                 break;
650         case SPEED_100:
651                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_100M;
652                 break;
653         case SPEED_1000:
654                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_1G;
655                 break;
656         case SPEED_10000:
657                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_10G;
658                 break;
659         default:
660                 dev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
661         }
662
663         dev->data->dev_link.link_duplex = edata.duplex ? ETH_LINK_FULL_DUPLEX :
664                                                          ETH_LINK_HALF_DUPLEX;
665         dev->data->dev_link.link_autoneg = edata.autoneg ? ETH_LINK_AUTONEG :
666                                                            ETH_LINK_FIXED;
667
668         return 0;
669 }
670
671 /**
672  * DPDK callback to enable promiscuous mode.
673  *
674  * @param dev
675  *   Pointer to Ethernet device structure.
676  */
677 static void
678 mrvl_promiscuous_enable(struct rte_eth_dev *dev)
679 {
680         struct mrvl_priv *priv = dev->data->dev_private;
681         int ret;
682
683         ret = pp2_ppio_set_uc_promisc(priv->ppio, 1);
684         if (ret)
685                 RTE_LOG(ERR, PMD, "Failed to enable promiscuous mode\n");
686 }
687
688 /**
689  * DPDK callback to enable allmulti mode.
690  *
691  * @param dev
692  *   Pointer to Ethernet device structure.
693  */
694 static void
695 mrvl_allmulticast_enable(struct rte_eth_dev *dev)
696 {
697         struct mrvl_priv *priv = dev->data->dev_private;
698         int ret;
699
700         ret = pp2_ppio_set_mc_promisc(priv->ppio, 1);
701         if (ret)
702                 RTE_LOG(ERR, PMD, "Failed enable all-multicast mode\n");
703 }
704
705 /**
706  * DPDK callback to disable promiscuous mode.
707  *
708  * @param dev
709  *   Pointer to Ethernet device structure.
710  */
711 static void
712 mrvl_promiscuous_disable(struct rte_eth_dev *dev)
713 {
714         struct mrvl_priv *priv = dev->data->dev_private;
715         int ret;
716
717         ret = pp2_ppio_set_uc_promisc(priv->ppio, 0);
718         if (ret)
719                 RTE_LOG(ERR, PMD, "Failed to disable promiscuous mode\n");
720 }
721
722 /**
723  * DPDK callback to disable allmulticast mode.
724  *
725  * @param dev
726  *   Pointer to Ethernet device structure.
727  */
728 static void
729 mrvl_allmulticast_disable(struct rte_eth_dev *dev)
730 {
731         struct mrvl_priv *priv = dev->data->dev_private;
732         int ret;
733
734         ret = pp2_ppio_set_mc_promisc(priv->ppio, 0);
735         if (ret)
736                 RTE_LOG(ERR, PMD, "Failed to disable all-multicast mode\n");
737 }
738
739 /**
740  * DPDK callback to remove a MAC address.
741  *
742  * @param dev
743  *   Pointer to Ethernet device structure.
744  * @param index
745  *   MAC address index.
746  */
747 static void
748 mrvl_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
749 {
750         struct mrvl_priv *priv = dev->data->dev_private;
751         char buf[ETHER_ADDR_FMT_SIZE];
752         int ret;
753
754         ret = pp2_ppio_remove_mac_addr(priv->ppio,
755                                        dev->data->mac_addrs[index].addr_bytes);
756         if (ret) {
757                 ether_format_addr(buf, sizeof(buf),
758                                   &dev->data->mac_addrs[index]);
759                 RTE_LOG(ERR, PMD, "Failed to remove mac %s\n", buf);
760         }
761 }
762
763 /**
764  * DPDK callback to add a MAC address.
765  *
766  * @param dev
767  *   Pointer to Ethernet device structure.
768  * @param mac_addr
769  *   MAC address to register.
770  * @param index
771  *   MAC address index.
772  * @param vmdq
773  *   VMDq pool index to associate address with (unused).
774  *
775  * @return
776  *   0 on success, negative error value otherwise.
777  */
778 static int
779 mrvl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
780                   uint32_t index, uint32_t vmdq __rte_unused)
781 {
782         struct mrvl_priv *priv = dev->data->dev_private;
783         char buf[ETHER_ADDR_FMT_SIZE];
784         int ret;
785
786         if (index == 0)
787                 /* For setting index 0, mrvl_mac_addr_set() should be used.*/
788                 return -1;
789
790         /*
791          * Maximum number of uc addresses can be tuned via kernel module mvpp2x
792          * parameter uc_filter_max. Maximum number of mc addresses is then
793          * MRVL_MAC_ADDRS_MAX - uc_filter_max. Currently it defaults to 4 and
794          * 21 respectively.
795          *
796          * If more than uc_filter_max uc addresses were added to filter list
797          * then NIC will switch to promiscuous mode automatically.
798          *
799          * If more than MRVL_MAC_ADDRS_MAX - uc_filter_max number mc addresses
800          * were added to filter list then NIC will switch to all-multicast mode
801          * automatically.
802          */
803         ret = pp2_ppio_add_mac_addr(priv->ppio, mac_addr->addr_bytes);
804         if (ret) {
805                 ether_format_addr(buf, sizeof(buf), mac_addr);
806                 RTE_LOG(ERR, PMD, "Failed to add mac %s\n", buf);
807                 return -1;
808         }
809
810         return 0;
811 }
812
813 /**
814  * DPDK callback to set the primary MAC address.
815  *
816  * @param dev
817  *   Pointer to Ethernet device structure.
818  * @param mac_addr
819  *   MAC address to register.
820  */
821 static void
822 mrvl_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
823 {
824         struct mrvl_priv *priv = dev->data->dev_private;
825
826         pp2_ppio_set_mac_addr(priv->ppio, mac_addr->addr_bytes);
827         /*
828          * TODO
829          * Port stops sending packets if pp2_ppio_set_mac_addr()
830          * was called after pp2_ppio_enable(). As a quick fix issue
831          * enable port once again.
832          */
833         pp2_ppio_enable(priv->ppio);
834 }
835
836 /**
837  * DPDK callback to get information about the device.
838  *
839  * @param dev
840  *   Pointer to Ethernet device structure (unused).
841  * @param info
842  *   Info structure output buffer.
843  */
844 static void
845 mrvl_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
846                    struct rte_eth_dev_info *info)
847 {
848         info->speed_capa = ETH_LINK_SPEED_10M |
849                            ETH_LINK_SPEED_100M |
850                            ETH_LINK_SPEED_1G |
851                            ETH_LINK_SPEED_10G;
852
853         info->max_rx_queues = MRVL_PP2_RXQ_MAX;
854         info->max_tx_queues = MRVL_PP2_TXQ_MAX;
855         info->max_mac_addrs = MRVL_MAC_ADDRS_MAX;
856
857         info->rx_desc_lim.nb_max = MRVL_PP2_RXD_MAX;
858         info->rx_desc_lim.nb_min = MRVL_PP2_RXD_MIN;
859         info->rx_desc_lim.nb_align = MRVL_PP2_RXD_ALIGN;
860
861         info->tx_desc_lim.nb_max = MRVL_PP2_TXD_MAX;
862         info->tx_desc_lim.nb_min = MRVL_PP2_TXD_MIN;
863         info->tx_desc_lim.nb_align = MRVL_PP2_TXD_ALIGN;
864
865         info->rx_offload_capa = DEV_RX_OFFLOAD_JUMBO_FRAME |
866                                 DEV_RX_OFFLOAD_VLAN_FILTER;
867         info->flow_type_rss_offloads = ETH_RSS_IPV4 |
868                                        ETH_RSS_NONFRAG_IPV4_TCP |
869                                        ETH_RSS_NONFRAG_IPV4_UDP;
870
871         /* By default packets are dropped if no descriptors are available */
872         info->default_rxconf.rx_drop_en = 1;
873
874         info->max_rx_pktlen = MRVL_PKT_SIZE_MAX;
875 }
876
877 /**
878  * DPDK callback to get information about specific receive queue.
879  *
880  * @param dev
881  *   Pointer to Ethernet device structure.
882  * @param rx_queue_id
883  *   Receive queue index.
884  * @param qinfo
885  *   Receive queue information structure.
886  */
887 static void mrvl_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
888                               struct rte_eth_rxq_info *qinfo)
889 {
890         struct mrvl_rxq *q = dev->data->rx_queues[rx_queue_id];
891         struct mrvl_priv *priv = dev->data->dev_private;
892         int inq = priv->rxq_map[rx_queue_id].inq;
893         int tc = priv->rxq_map[rx_queue_id].tc;
894         struct pp2_ppio_tc_params *tc_params =
895                 &priv->ppio_params.inqs_params.tcs_params[tc];
896
897         qinfo->mp = q->mp;
898         qinfo->nb_desc = tc_params->inqs_params[inq].size;
899 }
900
901 /**
902  * DPDK callback to get information about specific transmit queue.
903  *
904  * @param dev
905  *   Pointer to Ethernet device structure.
906  * @param tx_queue_id
907  *   Transmit queue index.
908  * @param qinfo
909  *   Transmit queue information structure.
910  */
911 static void mrvl_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
912                               struct rte_eth_txq_info *qinfo)
913 {
914         struct mrvl_priv *priv = dev->data->dev_private;
915
916         qinfo->nb_desc =
917                 priv->ppio_params.outqs_params.outqs_params[tx_queue_id].size;
918 }
919
920 /**
921  * DPDK callback to Configure a VLAN filter.
922  *
923  * @param dev
924  *   Pointer to Ethernet device structure.
925  * @param vlan_id
926  *   VLAN ID to filter.
927  * @param on
928  *   Toggle filter.
929  *
930  * @return
931  *   0 on success, negative error value otherwise.
932  */
933 static int
934 mrvl_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
935 {
936         struct mrvl_priv *priv = dev->data->dev_private;
937
938         return on ? pp2_ppio_add_vlan(priv->ppio, vlan_id) :
939                     pp2_ppio_remove_vlan(priv->ppio, vlan_id);
940 }
941
942 /**
943  * Release buffers to hardware bpool (buffer-pool)
944  *
945  * @param rxq
946  *   Receive queue pointer.
947  * @param num
948  *   Number of buffers to release to bpool.
949  *
950  * @return
951  *   0 on success, negative error value otherwise.
952  */
953 static int
954 mrvl_fill_bpool(struct mrvl_rxq *rxq, int num)
955 {
956         struct buff_release_entry entries[MRVL_PP2_TXD_MAX];
957         struct rte_mbuf *mbufs[MRVL_PP2_TXD_MAX];
958         int i, ret;
959         unsigned int core_id = rte_lcore_id();
960         struct pp2_hif *hif = hifs[core_id];
961         struct pp2_bpool *bpool = rxq->priv->bpool;
962
963         ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, num);
964         if (ret)
965                 return ret;
966
967         if (cookie_addr_high == MRVL_COOKIE_ADDR_INVALID)
968                 cookie_addr_high =
969                         (uint64_t)mbufs[0] & MRVL_COOKIE_HIGH_ADDR_MASK;
970
971         for (i = 0; i < num; i++) {
972                 if (((uint64_t)mbufs[i] & MRVL_COOKIE_HIGH_ADDR_MASK)
973                         != cookie_addr_high) {
974                         RTE_LOG(ERR, PMD,
975                                 "mbuf virtual addr high 0x%lx out of range\n",
976                                 (uint64_t)mbufs[i] >> 32);
977                         goto out;
978                 }
979
980                 entries[i].buff.addr =
981                         rte_mbuf_data_dma_addr_default(mbufs[i]);
982                 entries[i].buff.cookie = (pp2_cookie_t)(uint64_t)mbufs[i];
983                 entries[i].bpool = bpool;
984         }
985
986         pp2_bpool_put_buffs(hif, entries, (uint16_t *)&i);
987         mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] += i;
988
989         if (i != num)
990                 goto out;
991
992         return 0;
993 out:
994         for (; i < num; i++)
995                 rte_pktmbuf_free(mbufs[i]);
996
997         return -1;
998 }
999
1000 /**
1001  * DPDK callback to configure the receive queue.
1002  *
1003  * @param dev
1004  *   Pointer to Ethernet device structure.
1005  * @param idx
1006  *   RX queue index.
1007  * @param desc
1008  *   Number of descriptors to configure in queue.
1009  * @param socket
1010  *   NUMA socket on which memory must be allocated.
1011  * @param conf
1012  *   Thresholds parameters (unused_).
1013  * @param mp
1014  *   Memory pool for buffer allocations.
1015  *
1016  * @return
1017  *   0 on success, negative error value otherwise.
1018  */
1019 static int
1020 mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1021                     unsigned int socket,
1022                     const struct rte_eth_rxconf *conf __rte_unused,
1023                     struct rte_mempool *mp)
1024 {
1025         struct mrvl_priv *priv = dev->data->dev_private;
1026         struct mrvl_rxq *rxq;
1027         uint32_t min_size,
1028                  max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1029         int ret, tc, inq;
1030
1031         if (priv->rxq_map[idx].tc == MRVL_UNKNOWN_TC) {
1032                 /*
1033                  * Unknown TC mapping, mapping will not have a correct queue.
1034                  */
1035                 RTE_LOG(ERR, PMD, "Unknown TC mapping for queue %hu eth%hhu\n",
1036                         idx, priv->ppio_id);
1037                 return -EFAULT;
1038         }
1039
1040         min_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM -
1041                    MRVL_PKT_EFFEC_OFFS;
1042         if (min_size < max_rx_pkt_len) {
1043                 RTE_LOG(ERR, PMD,
1044                         "Mbuf size must be increased to %u bytes to hold up to %u bytes of data.\n",
1045                         max_rx_pkt_len + RTE_PKTMBUF_HEADROOM +
1046                         MRVL_PKT_EFFEC_OFFS,
1047                         max_rx_pkt_len);
1048                 return -EINVAL;
1049         }
1050
1051         if (dev->data->rx_queues[idx]) {
1052                 rte_free(dev->data->rx_queues[idx]);
1053                 dev->data->rx_queues[idx] = NULL;
1054         }
1055
1056         rxq = rte_zmalloc_socket("rxq", sizeof(*rxq), 0, socket);
1057         if (!rxq)
1058                 return -ENOMEM;
1059
1060         rxq->priv = priv;
1061         rxq->mp = mp;
1062         rxq->queue_id = idx;
1063         rxq->port_id = dev->data->port_id;
1064         mrvl_port_to_bpool_lookup[rxq->port_id] = priv->bpool;
1065
1066         tc = priv->rxq_map[rxq->queue_id].tc,
1067         inq = priv->rxq_map[rxq->queue_id].inq;
1068         priv->ppio_params.inqs_params.tcs_params[tc].inqs_params[inq].size =
1069                 desc;
1070
1071         ret = mrvl_fill_bpool(rxq, desc);
1072         if (ret) {
1073                 rte_free(rxq);
1074                 return ret;
1075         }
1076
1077         priv->bpool_init_size += desc;
1078
1079         dev->data->rx_queues[idx] = rxq;
1080
1081         return 0;
1082 }
1083
1084 /**
1085  * DPDK callback to release the receive queue.
1086  *
1087  * @param rxq
1088  *   Generic receive queue pointer.
1089  */
1090 static void
1091 mrvl_rx_queue_release(void *rxq)
1092 {
1093         struct mrvl_rxq *q = rxq;
1094         struct pp2_ppio_tc_params *tc_params;
1095         int i, num, tc, inq;
1096
1097         if (!q)
1098                 return;
1099
1100         tc = q->priv->rxq_map[q->queue_id].tc;
1101         inq = q->priv->rxq_map[q->queue_id].inq;
1102         tc_params = &q->priv->ppio_params.inqs_params.tcs_params[tc];
1103         num = tc_params->inqs_params[inq].size;
1104         for (i = 0; i < num; i++) {
1105                 struct pp2_buff_inf inf;
1106                 uint64_t addr;
1107
1108                 pp2_bpool_get_buff(hifs[rte_lcore_id()], q->priv->bpool, &inf);
1109                 addr = cookie_addr_high | inf.cookie;
1110                 rte_pktmbuf_free((struct rte_mbuf *)addr);
1111         }
1112
1113         rte_free(q);
1114 }
1115
1116 /**
1117  * DPDK callback to configure the transmit queue.
1118  *
1119  * @param dev
1120  *   Pointer to Ethernet device structure.
1121  * @param idx
1122  *   Transmit queue index.
1123  * @param desc
1124  *   Number of descriptors to configure in the queue.
1125  * @param socket
1126  *   NUMA socket on which memory must be allocated.
1127  * @param conf
1128  *   Thresholds parameters (unused).
1129  *
1130  * @return
1131  *   0 on success, negative error value otherwise.
1132  */
1133 static int
1134 mrvl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1135                     unsigned int socket,
1136                     const struct rte_eth_txconf *conf __rte_unused)
1137 {
1138         struct mrvl_priv *priv = dev->data->dev_private;
1139         struct mrvl_txq *txq;
1140
1141         if (dev->data->tx_queues[idx]) {
1142                 rte_free(dev->data->tx_queues[idx]);
1143                 dev->data->tx_queues[idx] = NULL;
1144         }
1145
1146         txq = rte_zmalloc_socket("txq", sizeof(*txq), 0, socket);
1147         if (!txq)
1148                 return -ENOMEM;
1149
1150         txq->priv = priv;
1151         txq->queue_id = idx;
1152         txq->port_id = dev->data->port_id;
1153         dev->data->tx_queues[idx] = txq;
1154
1155         priv->ppio_params.outqs_params.outqs_params[idx].size = desc;
1156         priv->ppio_params.outqs_params.outqs_params[idx].weight = 1;
1157
1158         return 0;
1159 }
1160
1161 /**
1162  * DPDK callback to release the transmit queue.
1163  *
1164  * @param txq
1165  *   Generic transmit queue pointer.
1166  */
1167 static void
1168 mrvl_tx_queue_release(void *txq)
1169 {
1170         struct mrvl_txq *q = txq;
1171
1172         if (!q)
1173                 return;
1174
1175         rte_free(q);
1176 }
1177
1178 /**
1179  * Update RSS hash configuration
1180  *
1181  * @param dev
1182  *   Pointer to Ethernet device structure.
1183  * @param rss_conf
1184  *   Pointer to RSS configuration.
1185  *
1186  * @return
1187  *   0 on success, negative error value otherwise.
1188  */
1189 static int
1190 mrvl_rss_hash_update(struct rte_eth_dev *dev,
1191                      struct rte_eth_rss_conf *rss_conf)
1192 {
1193         struct mrvl_priv *priv = dev->data->dev_private;
1194
1195         return mrvl_configure_rss(priv, rss_conf);
1196 }
1197
1198 /**
1199  * DPDK callback to get RSS hash configuration.
1200  *
1201  * @param dev
1202  *   Pointer to Ethernet device structure.
1203  * @rss_conf
1204  *   Pointer to RSS configuration.
1205  *
1206  * @return
1207  *   Always 0.
1208  */
1209 static int
1210 mrvl_rss_hash_conf_get(struct rte_eth_dev *dev,
1211                        struct rte_eth_rss_conf *rss_conf)
1212 {
1213         struct mrvl_priv *priv = dev->data->dev_private;
1214         enum pp2_ppio_hash_type hash_type =
1215                 priv->ppio_params.inqs_params.hash_type;
1216
1217         rss_conf->rss_key = NULL;
1218
1219         if (hash_type == PP2_PPIO_HASH_T_NONE)
1220                 rss_conf->rss_hf = 0;
1221         else if (hash_type == PP2_PPIO_HASH_T_2_TUPLE)
1222                 rss_conf->rss_hf = ETH_RSS_IPV4;
1223         else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && priv->rss_hf_tcp)
1224                 rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_TCP;
1225         else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && !priv->rss_hf_tcp)
1226                 rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_UDP;
1227
1228         return 0;
1229 }
1230
1231 static const struct eth_dev_ops mrvl_ops = {
1232         .dev_configure = mrvl_dev_configure,
1233         .dev_start = mrvl_dev_start,
1234         .dev_stop = mrvl_dev_stop,
1235         .dev_set_link_up = mrvl_dev_set_link_up,
1236         .dev_set_link_down = mrvl_dev_set_link_down,
1237         .dev_close = mrvl_dev_close,
1238         .link_update = mrvl_link_update,
1239         .promiscuous_enable = mrvl_promiscuous_enable,
1240         .allmulticast_enable = mrvl_allmulticast_enable,
1241         .promiscuous_disable = mrvl_promiscuous_disable,
1242         .allmulticast_disable = mrvl_allmulticast_disable,
1243         .mac_addr_remove = mrvl_mac_addr_remove,
1244         .mac_addr_add = mrvl_mac_addr_add,
1245         .mac_addr_set = mrvl_mac_addr_set,
1246         .mtu_set = mrvl_mtu_set,
1247         .dev_infos_get = mrvl_dev_infos_get,
1248         .rxq_info_get = mrvl_rxq_info_get,
1249         .txq_info_get = mrvl_txq_info_get,
1250         .vlan_filter_set = mrvl_vlan_filter_set,
1251         .rx_queue_setup = mrvl_rx_queue_setup,
1252         .rx_queue_release = mrvl_rx_queue_release,
1253         .tx_queue_setup = mrvl_tx_queue_setup,
1254         .tx_queue_release = mrvl_tx_queue_release,
1255         .rss_hash_update = mrvl_rss_hash_update,
1256         .rss_hash_conf_get = mrvl_rss_hash_conf_get,
1257 };
1258
1259 /**
1260  * DPDK callback for receive.
1261  *
1262  * @param rxq
1263  *   Generic pointer to the receive queue.
1264  * @param rx_pkts
1265  *   Array to store received packets.
1266  * @param nb_pkts
1267  *   Maximum number of packets in array.
1268  *
1269  * @return
1270  *   Number of packets successfully received.
1271  */
1272 static uint16_t
1273 mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1274 {
1275         struct mrvl_rxq *q = rxq;
1276         struct pp2_ppio_desc descs[nb_pkts];
1277         struct pp2_bpool *bpool;
1278         int i, ret, rx_done = 0;
1279         int num;
1280         unsigned int core_id = rte_lcore_id();
1281
1282         if (unlikely(!q->priv->ppio))
1283                 return 0;
1284
1285         bpool = q->priv->bpool;
1286
1287         ret = pp2_ppio_recv(q->priv->ppio, q->priv->rxq_map[q->queue_id].tc,
1288                             q->priv->rxq_map[q->queue_id].inq, descs, &nb_pkts);
1289         if (unlikely(ret < 0)) {
1290                 RTE_LOG(ERR, PMD, "Failed to receive packets\n");
1291                 return 0;
1292         }
1293         mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] -= nb_pkts;
1294
1295         for (i = 0; i < nb_pkts; i++) {
1296                 struct rte_mbuf *mbuf;
1297                 enum pp2_inq_desc_status status;
1298                 uint64_t addr;
1299
1300                 if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
1301                         struct pp2_ppio_desc *pref_desc;
1302                         u64 pref_addr;
1303
1304                         pref_desc = &descs[i + MRVL_MUSDK_PREFETCH_SHIFT];
1305                         pref_addr = cookie_addr_high |
1306                                     pp2_ppio_inq_desc_get_cookie(pref_desc);
1307                         rte_mbuf_prefetch_part1((struct rte_mbuf *)(pref_addr));
1308                         rte_mbuf_prefetch_part2((struct rte_mbuf *)(pref_addr));
1309                 }
1310
1311                 addr = cookie_addr_high |
1312                        pp2_ppio_inq_desc_get_cookie(&descs[i]);
1313                 mbuf = (struct rte_mbuf *)addr;
1314                 rte_pktmbuf_reset(mbuf);
1315
1316                 /* drop packet in case of mac, overrun or resource error */
1317                 status = pp2_ppio_inq_desc_get_l2_pkt_error(&descs[i]);
1318                 if (unlikely(status != PP2_DESC_ERR_OK)) {
1319                         struct pp2_buff_inf binf = {
1320                                 .addr = rte_mbuf_data_dma_addr_default(mbuf),
1321                                 .cookie = (pp2_cookie_t)(uint64_t)mbuf,
1322                         };
1323
1324                         pp2_bpool_put_buff(hifs[core_id], bpool, &binf);
1325                         mrvl_port_bpool_size
1326                                 [bpool->pp2_id][bpool->id][core_id]++;
1327                         continue;
1328                 }
1329
1330                 mbuf->data_off += MRVL_PKT_EFFEC_OFFS;
1331                 mbuf->pkt_len = pp2_ppio_inq_desc_get_pkt_len(&descs[i]);
1332                 mbuf->data_len = mbuf->pkt_len;
1333                 mbuf->port = q->port_id;
1334
1335                 rx_pkts[rx_done++] = mbuf;
1336         }
1337
1338         if (rte_spinlock_trylock(&q->priv->lock) == 1) {
1339                 num = mrvl_get_bpool_size(bpool->pp2_id, bpool->id);
1340
1341                 if (unlikely(num <= q->priv->bpool_min_size ||
1342                              (!rx_done && num < q->priv->bpool_init_size))) {
1343                         ret = mrvl_fill_bpool(q, MRVL_BURST_SIZE);
1344                         if (ret)
1345                                 RTE_LOG(ERR, PMD, "Failed to fill bpool\n");
1346                 } else if (unlikely(num > q->priv->bpool_max_size)) {
1347                         int i;
1348                         int pkt_to_remove = num - q->priv->bpool_init_size;
1349                         struct rte_mbuf *mbuf;
1350                         struct pp2_buff_inf buff;
1351
1352                         RTE_LOG(DEBUG, PMD,
1353                                 "\nport-%d:%d: bpool %d oversize - remove %d buffers (pool size: %d -> %d)\n",
1354                                 bpool->pp2_id, q->priv->ppio->port_id,
1355                                 bpool->id, pkt_to_remove, num,
1356                                 q->priv->bpool_init_size);
1357
1358                         for (i = 0; i < pkt_to_remove; i++) {
1359                                 pp2_bpool_get_buff(hifs[core_id], bpool, &buff);
1360                                 mbuf = (struct rte_mbuf *)
1361                                         (cookie_addr_high | buff.cookie);
1362                                 rte_pktmbuf_free(mbuf);
1363                         }
1364                         mrvl_port_bpool_size
1365                                 [bpool->pp2_id][bpool->id][core_id] -=
1366                                                                 pkt_to_remove;
1367                 }
1368                 rte_spinlock_unlock(&q->priv->lock);
1369         }
1370
1371         return rx_done;
1372 }
1373
1374 /**
1375  * Release already sent buffers to bpool (buffer-pool).
1376  *
1377  * @param ppio
1378  *   Pointer to the port structure.
1379  * @param hif
1380  *   Pointer to the MUSDK hardware interface.
1381  * @param sq
1382  *   Pointer to the shadow queue.
1383  * @param qid
1384  *   Queue id number.
1385  * @param force
1386  *   Force releasing packets.
1387  */
1388 static inline void
1389 mrvl_free_sent_buffers(struct pp2_ppio *ppio, struct pp2_hif *hif,
1390                        struct mrvl_shadow_txq *sq, int qid, int force)
1391 {
1392         struct buff_release_entry *entry;
1393         uint16_t nb_done = 0, num = 0, skip_bufs = 0;
1394         int i, core_id = rte_lcore_id();
1395
1396         pp2_ppio_get_num_outq_done(ppio, hif, qid, &nb_done);
1397
1398         sq->num_to_release += nb_done;
1399
1400         if (likely(!force &&
1401                    sq->num_to_release < MRVL_PP2_BUF_RELEASE_BURST_SIZE))
1402                 return;
1403
1404         nb_done = sq->num_to_release;
1405         sq->num_to_release = 0;
1406
1407         for (i = 0; i < nb_done; i++) {
1408                 entry = &sq->ent[sq->tail + num];
1409                 if (unlikely(!entry->buff.addr)) {
1410                         RTE_LOG(ERR, PMD,
1411                                 "Shadow memory @%d: cookie(%lx), pa(%lx)!\n",
1412                                 sq->tail, (u64)entry->buff.cookie,
1413                                 (u64)entry->buff.addr);
1414                         skip_bufs = 1;
1415                         goto skip;
1416                 }
1417
1418                 if (unlikely(!entry->bpool)) {
1419                         struct rte_mbuf *mbuf;
1420
1421                         mbuf = (struct rte_mbuf *)
1422                                (cookie_addr_high | entry->buff.cookie);
1423                         rte_pktmbuf_free(mbuf);
1424                         skip_bufs = 1;
1425                         goto skip;
1426                 }
1427
1428                 mrvl_port_bpool_size
1429                         [entry->bpool->pp2_id][entry->bpool->id][core_id]++;
1430                 num++;
1431                 if (unlikely(sq->tail + num == MRVL_PP2_TX_SHADOWQ_SIZE))
1432                         goto skip;
1433                 continue;
1434 skip:
1435                 if (likely(num))
1436                         pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
1437                 num += skip_bufs;
1438                 sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
1439                 sq->size -= num;
1440                 num = 0;
1441         }
1442
1443         if (likely(num)) {
1444                 pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
1445                 sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
1446                 sq->size -= num;
1447         }
1448 }
1449
1450 /**
1451  * DPDK callback for transmit.
1452  *
1453  * @param txq
1454  *   Generic pointer transmit queue.
1455  * @param tx_pkts
1456  *   Packets to transmit.
1457  * @param nb_pkts
1458  *   Number of packets in array.
1459  *
1460  * @return
1461  *   Number of packets successfully transmitted.
1462  */
1463 static uint16_t
1464 mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1465 {
1466         struct mrvl_txq *q = txq;
1467         struct mrvl_shadow_txq *sq = &shadow_txqs[q->port_id][rte_lcore_id()];
1468         struct pp2_hif *hif = hifs[rte_lcore_id()];
1469         struct pp2_ppio_desc descs[nb_pkts];
1470         int i;
1471         uint16_t num, sq_free_size;
1472
1473         if (unlikely(!q->priv->ppio))
1474                 return 0;
1475
1476         if (sq->size)
1477                 mrvl_free_sent_buffers(q->priv->ppio, hif, sq, q->queue_id, 0);
1478
1479         sq_free_size = MRVL_PP2_TX_SHADOWQ_SIZE - sq->size - 1;
1480         if (unlikely(nb_pkts > sq_free_size)) {
1481                 RTE_LOG(DEBUG, PMD,
1482                         "No room in shadow queue for %d packets! %d packets will be sent.\n",
1483                         nb_pkts, sq_free_size);
1484                 nb_pkts = sq_free_size;
1485         }
1486
1487         for (i = 0; i < nb_pkts; i++) {
1488                 struct rte_mbuf *mbuf = tx_pkts[i];
1489
1490                 if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
1491                         struct rte_mbuf *pref_pkt_hdr;
1492
1493                         pref_pkt_hdr = tx_pkts[i + MRVL_MUSDK_PREFETCH_SHIFT];
1494                         rte_mbuf_prefetch_part1(pref_pkt_hdr);
1495                         rte_mbuf_prefetch_part2(pref_pkt_hdr);
1496                 }
1497
1498                 sq->ent[sq->head].buff.cookie = (pp2_cookie_t)(uint64_t)mbuf;
1499                 sq->ent[sq->head].buff.addr =
1500                         rte_mbuf_data_dma_addr_default(mbuf);
1501                 sq->ent[sq->head].bpool =
1502                         (unlikely(mbuf->port == 0xff || mbuf->refcnt > 1)) ?
1503                          NULL : mrvl_port_to_bpool_lookup[mbuf->port];
1504                 sq->head = (sq->head + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
1505                 sq->size++;
1506
1507                 pp2_ppio_outq_desc_reset(&descs[i]);
1508                 pp2_ppio_outq_desc_set_phys_addr(&descs[i],
1509                                                  rte_pktmbuf_mtophys(mbuf));
1510                 pp2_ppio_outq_desc_set_pkt_offset(&descs[i], 0);
1511                 pp2_ppio_outq_desc_set_pkt_len(&descs[i],
1512                                                rte_pktmbuf_pkt_len(mbuf));
1513         }
1514
1515         num = nb_pkts;
1516         pp2_ppio_send(q->priv->ppio, hif, q->queue_id, descs, &nb_pkts);
1517         /* number of packets that were not sent */
1518         if (unlikely(num > nb_pkts)) {
1519                 for (i = nb_pkts; i < num; i++) {
1520                         sq->head = (MRVL_PP2_TX_SHADOWQ_SIZE + sq->head - 1) &
1521                                 MRVL_PP2_TX_SHADOWQ_MASK;
1522                 }
1523                 sq->size -= num - nb_pkts;
1524         }
1525
1526         return nb_pkts;
1527 }
1528
1529 /**
1530  * Initialize packet processor.
1531  *
1532  * @return
1533  *   0 on success, negative error value otherwise.
1534  */
1535 static int
1536 mrvl_init_pp2(void)
1537 {
1538         struct pp2_init_params init_params;
1539
1540         memset(&init_params, 0, sizeof(init_params));
1541         init_params.hif_reserved_map = MRVL_MUSDK_HIFS_RESERVED;
1542         init_params.bm_pool_reserved_map = MRVL_MUSDK_BPOOLS_RESERVED;
1543         init_params.rss_tbl_reserved_map = MRVL_MUSDK_RSS_RESERVED;
1544
1545         return pp2_init(&init_params);
1546 }
1547
1548 /**
1549  * Deinitialize packet processor.
1550  *
1551  * @return
1552  *   0 on success, negative error value otherwise.
1553  */
1554 static void
1555 mrvl_deinit_pp2(void)
1556 {
1557         pp2_deinit();
1558 }
1559
1560 /**
1561  * Create private device structure.
1562  *
1563  * @param dev_name
1564  *   Pointer to the port name passed in the initialization parameters.
1565  *
1566  * @return
1567  *   Pointer to the newly allocated private device structure.
1568  */
1569 static struct mrvl_priv *
1570 mrvl_priv_create(const char *dev_name)
1571 {
1572         struct pp2_bpool_params bpool_params;
1573         char match[MRVL_MATCH_LEN];
1574         struct mrvl_priv *priv;
1575         int ret, bpool_bit;
1576
1577         priv = rte_zmalloc_socket(dev_name, sizeof(*priv), 0, rte_socket_id());
1578         if (!priv)
1579                 return NULL;
1580
1581         ret = pp2_netdev_get_ppio_info((char *)(uintptr_t)dev_name,
1582                                        &priv->pp_id, &priv->ppio_id);
1583         if (ret)
1584                 goto out_free_priv;
1585
1586         bpool_bit = mrvl_reserve_bit(&used_bpools[priv->pp_id],
1587                                      PP2_BPOOL_NUM_POOLS);
1588         if (bpool_bit < 0)
1589                 goto out_free_priv;
1590         priv->bpool_bit = bpool_bit;
1591
1592         snprintf(match, sizeof(match), "pool-%d:%d", priv->pp_id,
1593                  priv->bpool_bit);
1594         memset(&bpool_params, 0, sizeof(bpool_params));
1595         bpool_params.match = match;
1596         bpool_params.buff_len = MRVL_PKT_SIZE_MAX + MRVL_PKT_EFFEC_OFFS;
1597         ret = pp2_bpool_init(&bpool_params, &priv->bpool);
1598         if (ret)
1599                 goto out_clear_bpool_bit;
1600
1601         priv->ppio_params.type = PP2_PPIO_T_NIC;
1602         rte_spinlock_init(&priv->lock);
1603
1604         return priv;
1605 out_clear_bpool_bit:
1606         used_bpools[priv->pp_id] &= ~(1 << priv->bpool_bit);
1607 out_free_priv:
1608         rte_free(priv);
1609         return NULL;
1610 }
1611
1612 /**
1613  * Create device representing Ethernet port.
1614  *
1615  * @param name
1616  *   Pointer to the port's name.
1617  *
1618  * @return
1619  *   0 on success, negative error value otherwise.
1620  */
1621 static int
1622 mrvl_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
1623 {
1624         int ret, fd = socket(AF_INET, SOCK_DGRAM, 0);
1625         struct rte_eth_dev *eth_dev;
1626         struct mrvl_priv *priv;
1627         struct ifreq req;
1628
1629         eth_dev = rte_eth_dev_allocate(name);
1630         if (!eth_dev)
1631                 return -ENOMEM;
1632
1633         priv = mrvl_priv_create(name);
1634         if (!priv) {
1635                 ret = -ENOMEM;
1636                 goto out_free_dev;
1637         }
1638
1639         eth_dev->data->mac_addrs =
1640                 rte_zmalloc("mac_addrs",
1641                             ETHER_ADDR_LEN * MRVL_MAC_ADDRS_MAX, 0);
1642         if (!eth_dev->data->mac_addrs) {
1643                 RTE_LOG(ERR, PMD, "Failed to allocate space for eth addrs\n");
1644                 ret = -ENOMEM;
1645                 goto out_free_priv;
1646         }
1647
1648         memset(&req, 0, sizeof(req));
1649         strcpy(req.ifr_name, name);
1650         ret = ioctl(fd, SIOCGIFHWADDR, &req);
1651         if (ret)
1652                 goto out_free_mac;
1653
1654         memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1655                req.ifr_addr.sa_data, ETHER_ADDR_LEN);
1656
1657         eth_dev->rx_pkt_burst = mrvl_rx_pkt_burst;
1658         eth_dev->tx_pkt_burst = mrvl_tx_pkt_burst;
1659         eth_dev->data->dev_private = priv;
1660         eth_dev->device = &vdev->device;
1661         eth_dev->dev_ops = &mrvl_ops;
1662
1663         return 0;
1664 out_free_mac:
1665         rte_free(eth_dev->data->mac_addrs);
1666 out_free_dev:
1667         rte_eth_dev_release_port(eth_dev);
1668 out_free_priv:
1669         rte_free(priv);
1670
1671         return ret;
1672 }
1673
1674 /**
1675  * Cleanup previously created device representing Ethernet port.
1676  *
1677  * @param name
1678  *   Pointer to the port name.
1679  */
1680 static void
1681 mrvl_eth_dev_destroy(const char *name)
1682 {
1683         struct rte_eth_dev *eth_dev;
1684         struct mrvl_priv *priv;
1685
1686         eth_dev = rte_eth_dev_allocated(name);
1687         if (!eth_dev)
1688                 return;
1689
1690         priv = eth_dev->data->dev_private;
1691         pp2_bpool_deinit(priv->bpool);
1692         rte_free(priv);
1693         rte_free(eth_dev->data->mac_addrs);
1694         rte_eth_dev_release_port(eth_dev);
1695 }
1696
1697 /**
1698  * Callback used by rte_kvargs_process() during argument parsing.
1699  *
1700  * @param key
1701  *   Pointer to the parsed key (unused).
1702  * @param value
1703  *   Pointer to the parsed value.
1704  * @param extra_args
1705  *   Pointer to the extra arguments which contains address of the
1706  *   table of pointers to parsed interface names.
1707  *
1708  * @return
1709  *   Always 0.
1710  */
1711 static int
1712 mrvl_get_ifnames(const char *key __rte_unused, const char *value,
1713                  void *extra_args)
1714 {
1715         const char **ifnames = extra_args;
1716
1717         ifnames[mrvl_ports_nb++] = value;
1718
1719         return 0;
1720 }
1721
1722 /**
1723  * Initialize per-lcore MUSDK hardware interfaces (hifs).
1724  *
1725  * @return
1726  *   0 on success, negative error value otherwise.
1727  */
1728 static int
1729 mrvl_init_hifs(void)
1730 {
1731         struct pp2_hif_params params;
1732         char match[MRVL_MATCH_LEN];
1733         int i, ret;
1734
1735         RTE_LCORE_FOREACH(i) {
1736                 ret = mrvl_reserve_bit(&used_hifs, MRVL_MUSDK_HIFS_MAX);
1737                 if (ret < 0)
1738                         return ret;
1739
1740                 snprintf(match, sizeof(match), "hif-%d", ret);
1741                 memset(&params, 0, sizeof(params));
1742                 params.match = match;
1743                 params.out_size = MRVL_PP2_AGGR_TXQD_MAX;
1744                 ret = pp2_hif_init(&params, &hifs[i]);
1745                 if (ret) {
1746                         RTE_LOG(ERR, PMD, "Failed to initialize hif %d\n", i);
1747                         return ret;
1748                 }
1749         }
1750
1751         return 0;
1752 }
1753
1754 /**
1755  * Deinitialize per-lcore MUSDK hardware interfaces (hifs).
1756  */
1757 static void
1758 mrvl_deinit_hifs(void)
1759 {
1760         int i;
1761
1762         RTE_LCORE_FOREACH(i) {
1763                 if (hifs[i])
1764                         pp2_hif_deinit(hifs[i]);
1765         }
1766 }
1767
1768 static void mrvl_set_first_last_cores(int core_id)
1769 {
1770         if (core_id < mrvl_lcore_first)
1771                 mrvl_lcore_first = core_id;
1772
1773         if (core_id > mrvl_lcore_last)
1774                 mrvl_lcore_last = core_id;
1775 }
1776
1777 /**
1778  * DPDK callback to register the virtual device.
1779  *
1780  * @param vdev
1781  *   Pointer to the virtual device.
1782  *
1783  * @return
1784  *   0 on success, negative error value otherwise.
1785  */
1786 static int
1787 rte_pmd_mrvl_probe(struct rte_vdev_device *vdev)
1788 {
1789         struct rte_kvargs *kvlist;
1790         const char *ifnames[PP2_NUM_ETH_PPIO * PP2_NUM_PKT_PROC];
1791         int ret = -EINVAL;
1792         uint32_t i, ifnum, cfgnum, core_id;
1793         const char *params;
1794
1795         params = rte_vdev_device_args(vdev);
1796         if (!params)
1797                 return -EINVAL;
1798
1799         kvlist = rte_kvargs_parse(params, valid_args);
1800         if (!kvlist)
1801                 return -EINVAL;
1802
1803         ifnum = rte_kvargs_count(kvlist, MRVL_IFACE_NAME_ARG);
1804         if (ifnum > RTE_DIM(ifnames))
1805                 goto out_free_kvlist;
1806
1807         rte_kvargs_process(kvlist, MRVL_IFACE_NAME_ARG,
1808                            mrvl_get_ifnames, &ifnames);
1809
1810         cfgnum = rte_kvargs_count(kvlist, MRVL_CFG_ARG);
1811         if (cfgnum > 1) {
1812                 RTE_LOG(ERR, PMD, "Cannot handle more than one config file!\n");
1813                 goto out_free_kvlist;
1814         } else if (cfgnum == 1) {
1815                 rte_kvargs_process(kvlist, MRVL_CFG_ARG,
1816                                    mrvl_get_qoscfg, &mrvl_qos_cfg);
1817         }
1818
1819         /*
1820          * ret == -EEXIST is correct, it means DMA
1821          * has been already initialized (by another PMD).
1822          */
1823         ret = mv_sys_dma_mem_init(RTE_MRVL_MUSDK_DMA_MEMSIZE);
1824         if (ret < 0 && ret != -EEXIST)
1825                 goto out_free_kvlist;
1826
1827         ret = mrvl_init_pp2();
1828         if (ret) {
1829                 RTE_LOG(ERR, PMD, "Failed to init PP!\n");
1830                 goto out_deinit_dma;
1831         }
1832
1833         ret = mrvl_init_hifs();
1834         if (ret)
1835                 goto out_deinit_hifs;
1836
1837         for (i = 0; i < ifnum; i++) {
1838                 RTE_LOG(INFO, PMD, "Creating %s\n", ifnames[i]);
1839                 ret = mrvl_eth_dev_create(vdev, ifnames[i]);
1840                 if (ret)
1841                         goto out_cleanup;
1842         }
1843
1844         rte_kvargs_free(kvlist);
1845
1846         memset(mrvl_port_bpool_size, 0, sizeof(mrvl_port_bpool_size));
1847
1848         mrvl_lcore_first = RTE_MAX_LCORE;
1849         mrvl_lcore_last = 0;
1850
1851         RTE_LCORE_FOREACH(core_id) {
1852                 mrvl_set_first_last_cores(core_id);
1853         }
1854
1855         return 0;
1856 out_cleanup:
1857         for (; i > 0; i--)
1858                 mrvl_eth_dev_destroy(ifnames[i]);
1859 out_deinit_hifs:
1860         mrvl_deinit_hifs();
1861         mrvl_deinit_pp2();
1862 out_deinit_dma:
1863         mv_sys_dma_mem_destroy();
1864 out_free_kvlist:
1865         rte_kvargs_free(kvlist);
1866
1867         return ret;
1868 }
1869
1870 /**
1871  * DPDK callback to remove virtual device.
1872  *
1873  * @param vdev
1874  *   Pointer to the removed virtual device.
1875  *
1876  * @return
1877  *   0 on success, negative error value otherwise.
1878  */
1879 static int
1880 rte_pmd_mrvl_remove(struct rte_vdev_device *vdev)
1881 {
1882         int i;
1883         const char *name;
1884
1885         name = rte_vdev_device_name(vdev);
1886         if (!name)
1887                 return -EINVAL;
1888
1889         RTE_LOG(INFO, PMD, "Removing %s\n", name);
1890
1891         for (i = 0; i < rte_eth_dev_count(); i++) {
1892                 char ifname[RTE_ETH_NAME_MAX_LEN];
1893
1894                 rte_eth_dev_get_name_by_port(i, ifname);
1895                 mrvl_eth_dev_destroy(ifname);
1896         }
1897
1898         mrvl_deinit_hifs();
1899         mrvl_deinit_pp2();
1900         mv_sys_dma_mem_destroy();
1901
1902         return 0;
1903 }
1904
1905 static struct rte_vdev_driver pmd_mrvl_drv = {
1906         .probe = rte_pmd_mrvl_probe,
1907         .remove = rte_pmd_mrvl_remove,
1908 };
1909
1910 RTE_PMD_REGISTER_VDEV(net_mrvl, pmd_mrvl_drv);
1911 RTE_PMD_REGISTER_ALIAS(net_mrvl, eth_mrvl);