net/enic: add devarg to specify ingress VLAN rewrite mode
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct vnic_wq_buf *buf)
73 {
74         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->mb;
75
76         rte_pktmbuf_free_seg(mbuf);
77         buf->mb = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 int enic_enable(struct enic *enic)
497 {
498         unsigned int index;
499         int err;
500         struct rte_eth_dev *eth_dev = enic->rte_dev;
501
502         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
503         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
504
505         /* vnic notification of link status has already been turned on in
506          * enic_dev_init() which is called during probe time.  Here we are
507          * just turning on interrupt vector 0 if needed.
508          */
509         if (eth_dev->data->dev_conf.intr_conf.lsc)
510                 vnic_dev_notify_set(enic->vdev, 0);
511
512         err = enic_rxq_intr_init(enic);
513         if (err)
514                 return err;
515         if (enic_clsf_init(enic))
516                 dev_warning(enic, "Init of hash table for clsf failed."\
517                         "Flow director feature will not work\n");
518
519         for (index = 0; index < enic->rq_count; index++) {
520                 err = enic_alloc_rx_queue_mbufs(enic,
521                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
522                 if (err) {
523                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
524                         return err;
525                 }
526                 err = enic_alloc_rx_queue_mbufs(enic,
527                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
528                 if (err) {
529                         /* release the allocated mbufs for the sop rq*/
530                         enic_rxmbuf_queue_release(enic,
531                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
532
533                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
534                         return err;
535                 }
536         }
537
538         for (index = 0; index < enic->wq_count; index++)
539                 enic_start_wq(enic, index);
540         for (index = 0; index < enic->rq_count; index++)
541                 enic_start_rq(enic, index);
542
543         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
544
545         vnic_dev_enable_wait(enic->vdev);
546
547         /* Register and enable error interrupt */
548         rte_intr_callback_register(&(enic->pdev->intr_handle),
549                 enic_intr_handler, (void *)enic->rte_dev);
550
551         rte_intr_enable(&(enic->pdev->intr_handle));
552         /* Unmask LSC interrupt */
553         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
554
555         return 0;
556 }
557
558 int enic_alloc_intr_resources(struct enic *enic)
559 {
560         int err;
561         unsigned int i;
562
563         dev_info(enic, "vNIC resources used:  "\
564                 "wq %d rq %d cq %d intr %d\n",
565                 enic->wq_count, enic_vnic_rq_count(enic),
566                 enic->cq_count, enic->intr_count);
567
568         for (i = 0; i < enic->intr_count; i++) {
569                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
570                 if (err) {
571                         enic_free_vnic_resources(enic);
572                         return err;
573                 }
574         }
575         return 0;
576 }
577
578 void enic_free_rq(void *rxq)
579 {
580         struct vnic_rq *rq_sop, *rq_data;
581         struct enic *enic;
582
583         if (rxq == NULL)
584                 return;
585
586         rq_sop = (struct vnic_rq *)rxq;
587         enic = vnic_dev_priv(rq_sop->vdev);
588         rq_data = &enic->rq[rq_sop->data_queue_idx];
589
590         enic_rxmbuf_queue_release(enic, rq_sop);
591         if (rq_data->in_use)
592                 enic_rxmbuf_queue_release(enic, rq_data);
593
594         rte_free(rq_sop->mbuf_ring);
595         if (rq_data->in_use)
596                 rte_free(rq_data->mbuf_ring);
597
598         rq_sop->mbuf_ring = NULL;
599         rq_data->mbuf_ring = NULL;
600
601         vnic_rq_free(rq_sop);
602         if (rq_data->in_use)
603                 vnic_rq_free(rq_data);
604
605         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
606
607         rq_sop->in_use = 0;
608         rq_data->in_use = 0;
609 }
610
611 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
612 {
613         struct rte_eth_dev *eth_dev = enic->rte_dev;
614         vnic_wq_enable(&enic->wq[queue_idx]);
615         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
616 }
617
618 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
619 {
620         struct rte_eth_dev *eth_dev = enic->rte_dev;
621         int ret;
622
623         ret = vnic_wq_disable(&enic->wq[queue_idx]);
624         if (ret)
625                 return ret;
626
627         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
628         return 0;
629 }
630
631 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
632 {
633         struct vnic_rq *rq_sop;
634         struct vnic_rq *rq_data;
635         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
636         rq_data = &enic->rq[rq_sop->data_queue_idx];
637         struct rte_eth_dev *eth_dev = enic->rte_dev;
638
639         if (rq_data->in_use) {
640                 vnic_rq_enable(rq_data);
641                 enic_initial_post_rx(enic, rq_data);
642         }
643         rte_mb();
644         vnic_rq_enable(rq_sop);
645         enic_initial_post_rx(enic, rq_sop);
646         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
647 }
648
649 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
650 {
651         int ret1 = 0, ret2 = 0;
652         struct rte_eth_dev *eth_dev = enic->rte_dev;
653         struct vnic_rq *rq_sop;
654         struct vnic_rq *rq_data;
655         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
656         rq_data = &enic->rq[rq_sop->data_queue_idx];
657
658         ret2 = vnic_rq_disable(rq_sop);
659         rte_mb();
660         if (rq_data->in_use)
661                 ret1 = vnic_rq_disable(rq_data);
662
663         if (ret2)
664                 return ret2;
665         else if (ret1)
666                 return ret1;
667
668         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
669         return 0;
670 }
671
672 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
673         unsigned int socket_id, struct rte_mempool *mp,
674         uint16_t nb_desc, uint16_t free_thresh)
675 {
676         int rc;
677         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
678         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
679         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
680         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
681         unsigned int mbuf_size, mbufs_per_pkt;
682         unsigned int nb_sop_desc, nb_data_desc;
683         uint16_t min_sop, max_sop, min_data, max_data;
684         uint32_t max_rx_pkt_len;
685
686         rq_sop->is_sop = 1;
687         rq_sop->data_queue_idx = data_queue_idx;
688         rq_data->is_sop = 0;
689         rq_data->data_queue_idx = 0;
690         rq_sop->socket_id = socket_id;
691         rq_sop->mp = mp;
692         rq_data->socket_id = socket_id;
693         rq_data->mp = mp;
694         rq_sop->in_use = 1;
695         rq_sop->rx_free_thresh = free_thresh;
696         rq_data->rx_free_thresh = free_thresh;
697         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
698                   free_thresh);
699
700         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
701                                RTE_PKTMBUF_HEADROOM);
702         /* max_rx_pkt_len includes the ethernet header and CRC. */
703         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
704
705         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
706             DEV_RX_OFFLOAD_SCATTER) {
707                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
708                 /* ceil((max pkt len)/mbuf_size) */
709                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
710         } else {
711                 dev_info(enic, "Scatter rx mode disabled\n");
712                 mbufs_per_pkt = 1;
713                 if (max_rx_pkt_len > mbuf_size) {
714                         dev_warning(enic, "The maximum Rx packet size (%u) is"
715                                     " larger than the mbuf size (%u), and"
716                                     " scatter is disabled. Larger packets will"
717                                     " be truncated.\n",
718                                     max_rx_pkt_len, mbuf_size);
719                 }
720         }
721
722         if (mbufs_per_pkt > 1) {
723                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
724                 rq_sop->data_queue_enable = 1;
725                 rq_data->in_use = 1;
726                 /*
727                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
728                  * receives packet sizes up to the "max" MTU.
729                  * If not using scatter, we can achieve the effect of dropping
730                  * larger packets by reducing the size of posted buffers.
731                  * See enic_alloc_rx_queue_mbufs().
732                  */
733                 if (max_rx_pkt_len <
734                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
735                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
736                                     " when scatter rx mode is in use.\n");
737                 }
738         } else {
739                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
740                          queue_idx);
741                 rq_sop->data_queue_enable = 0;
742                 rq_data->in_use = 0;
743         }
744
745         /* number of descriptors have to be a multiple of 32 */
746         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
747         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
748
749         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
750         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
751
752         if (mbufs_per_pkt > 1) {
753                 min_sop = 64;
754                 max_sop = ((enic->config.rq_desc_count /
755                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
756                 min_data = min_sop * (mbufs_per_pkt - 1);
757                 max_data = enic->config.rq_desc_count;
758         } else {
759                 min_sop = 64;
760                 max_sop = enic->config.rq_desc_count;
761                 min_data = 0;
762                 max_data = 0;
763         }
764
765         if (nb_desc < (min_sop + min_data)) {
766                 dev_warning(enic,
767                             "Number of rx descs too low, adjusting to minimum\n");
768                 nb_sop_desc = min_sop;
769                 nb_data_desc = min_data;
770         } else if (nb_desc > (max_sop + max_data)) {
771                 dev_warning(enic,
772                             "Number of rx_descs too high, adjusting to maximum\n");
773                 nb_sop_desc = max_sop;
774                 nb_data_desc = max_data;
775         }
776         if (mbufs_per_pkt > 1) {
777                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
778                          " rx descriptor range is %u to %u\n",
779                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
780                          max_sop + max_data);
781         }
782         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
783                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
784
785         /* Allocate sop queue resources */
786         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
787                 nb_sop_desc, sizeof(struct rq_enet_desc));
788         if (rc) {
789                 dev_err(enic, "error in allocation of sop rq\n");
790                 goto err_exit;
791         }
792         nb_sop_desc = rq_sop->ring.desc_count;
793
794         if (rq_data->in_use) {
795                 /* Allocate data queue resources */
796                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
797                                    nb_data_desc,
798                                    sizeof(struct rq_enet_desc));
799                 if (rc) {
800                         dev_err(enic, "error in allocation of data rq\n");
801                         goto err_free_rq_sop;
802                 }
803                 nb_data_desc = rq_data->ring.desc_count;
804         }
805         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
806                            socket_id, nb_sop_desc + nb_data_desc,
807                            sizeof(struct cq_enet_rq_desc));
808         if (rc) {
809                 dev_err(enic, "error in allocation of cq for rq\n");
810                 goto err_free_rq_data;
811         }
812
813         /* Allocate the mbuf rings */
814         rq_sop->mbuf_ring = (struct rte_mbuf **)
815                 rte_zmalloc_socket("rq->mbuf_ring",
816                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
817                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
818         if (rq_sop->mbuf_ring == NULL)
819                 goto err_free_cq;
820
821         if (rq_data->in_use) {
822                 rq_data->mbuf_ring = (struct rte_mbuf **)
823                         rte_zmalloc_socket("rq->mbuf_ring",
824                                 sizeof(struct rte_mbuf *) * nb_data_desc,
825                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
826                 if (rq_data->mbuf_ring == NULL)
827                         goto err_free_sop_mbuf;
828         }
829
830         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
831
832         return 0;
833
834 err_free_sop_mbuf:
835         rte_free(rq_sop->mbuf_ring);
836 err_free_cq:
837         /* cleanup on error */
838         vnic_cq_free(&enic->cq[queue_idx]);
839 err_free_rq_data:
840         if (rq_data->in_use)
841                 vnic_rq_free(rq_data);
842 err_free_rq_sop:
843         vnic_rq_free(rq_sop);
844 err_exit:
845         return -ENOMEM;
846 }
847
848 void enic_free_wq(void *txq)
849 {
850         struct vnic_wq *wq;
851         struct enic *enic;
852
853         if (txq == NULL)
854                 return;
855
856         wq = (struct vnic_wq *)txq;
857         enic = vnic_dev_priv(wq->vdev);
858         rte_memzone_free(wq->cqmsg_rz);
859         vnic_wq_free(wq);
860         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
861 }
862
863 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
864         unsigned int socket_id, uint16_t nb_desc)
865 {
866         int err;
867         struct vnic_wq *wq = &enic->wq[queue_idx];
868         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
869         char name[NAME_MAX];
870         static int instance;
871
872         wq->socket_id = socket_id;
873         /*
874          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
875          * print an info message for diagnostics.
876          */
877         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
878
879         /* Allocate queue resources */
880         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
881                 nb_desc,
882                 sizeof(struct wq_enet_desc));
883         if (err) {
884                 dev_err(enic, "error in allocation of wq\n");
885                 return err;
886         }
887
888         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
889                 socket_id, nb_desc,
890                 sizeof(struct cq_enet_wq_desc));
891         if (err) {
892                 vnic_wq_free(wq);
893                 dev_err(enic, "error in allocation of cq for wq\n");
894         }
895
896         /* setup up CQ message */
897         snprintf((char *)name, sizeof(name),
898                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
899                 instance++);
900
901         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
902                         sizeof(uint32_t), SOCKET_ID_ANY,
903                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
904         if (!wq->cqmsg_rz)
905                 return -ENOMEM;
906
907         return err;
908 }
909
910 int enic_disable(struct enic *enic)
911 {
912         unsigned int i;
913         int err;
914
915         for (i = 0; i < enic->intr_count; i++) {
916                 vnic_intr_mask(&enic->intr[i]);
917                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
918         }
919         enic_rxq_intr_deinit(enic);
920         rte_intr_disable(&enic->pdev->intr_handle);
921         rte_intr_callback_unregister(&enic->pdev->intr_handle,
922                                      enic_intr_handler,
923                                      (void *)enic->rte_dev);
924
925         vnic_dev_disable(enic->vdev);
926
927         enic_clsf_destroy(enic);
928
929         if (!enic_is_sriov_vf(enic))
930                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
931
932         for (i = 0; i < enic->wq_count; i++) {
933                 err = vnic_wq_disable(&enic->wq[i]);
934                 if (err)
935                         return err;
936         }
937         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
938                 if (enic->rq[i].in_use) {
939                         err = vnic_rq_disable(&enic->rq[i]);
940                         if (err)
941                                 return err;
942                 }
943         }
944
945         /* If we were using interrupts, set the interrupt vector to -1
946          * to disable interrupts.  We are not disabling link notifcations,
947          * though, as we want the polling of link status to continue working.
948          */
949         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
950                 vnic_dev_notify_set(enic->vdev, -1);
951
952         vnic_dev_set_reset_flag(enic->vdev, 1);
953
954         for (i = 0; i < enic->wq_count; i++)
955                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
956
957         for (i = 0; i < enic_vnic_rq_count(enic); i++)
958                 if (enic->rq[i].in_use)
959                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
960         for (i = 0; i < enic->cq_count; i++)
961                 vnic_cq_clean(&enic->cq[i]);
962         for (i = 0; i < enic->intr_count; i++)
963                 vnic_intr_clean(&enic->intr[i]);
964
965         return 0;
966 }
967
968 static int enic_dev_wait(struct vnic_dev *vdev,
969         int (*start)(struct vnic_dev *, int),
970         int (*finished)(struct vnic_dev *, int *),
971         int arg)
972 {
973         int done;
974         int err;
975         int i;
976
977         err = start(vdev, arg);
978         if (err)
979                 return err;
980
981         /* Wait for func to complete...2 seconds max */
982         for (i = 0; i < 2000; i++) {
983                 err = finished(vdev, &done);
984                 if (err)
985                         return err;
986                 if (done)
987                         return 0;
988                 usleep(1000);
989         }
990         return -ETIMEDOUT;
991 }
992
993 static int enic_dev_open(struct enic *enic)
994 {
995         int err;
996         int flags = CMD_OPENF_IG_DESCCACHE;
997
998         err = enic_dev_wait(enic->vdev, vnic_dev_open,
999                 vnic_dev_open_done, flags);
1000         if (err)
1001                 dev_err(enic_get_dev(enic),
1002                         "vNIC device open failed, err %d\n", err);
1003
1004         return err;
1005 }
1006
1007 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1008 {
1009         dma_addr_t rss_key_buf_pa;
1010         union vnic_rss_key *rss_key_buf_va = NULL;
1011         int err, i;
1012         u8 name[NAME_MAX];
1013
1014         RTE_ASSERT(user_key != NULL);
1015         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1016         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1017                 &rss_key_buf_pa, name);
1018         if (!rss_key_buf_va)
1019                 return -ENOMEM;
1020
1021         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1022                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1023
1024         err = enic_set_rss_key(enic,
1025                 rss_key_buf_pa,
1026                 sizeof(union vnic_rss_key));
1027
1028         /* Save for later queries */
1029         if (!err) {
1030                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1031                            sizeof(union vnic_rss_key));
1032         }
1033         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1034                 rss_key_buf_va, rss_key_buf_pa);
1035
1036         return err;
1037 }
1038
1039 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1040 {
1041         dma_addr_t rss_cpu_buf_pa;
1042         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1043         int err;
1044         u8 name[NAME_MAX];
1045
1046         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1047         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1048                 &rss_cpu_buf_pa, name);
1049         if (!rss_cpu_buf_va)
1050                 return -ENOMEM;
1051
1052         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1053
1054         err = enic_set_rss_cpu(enic,
1055                 rss_cpu_buf_pa,
1056                 sizeof(union vnic_rss_cpu));
1057
1058         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1059                 rss_cpu_buf_va, rss_cpu_buf_pa);
1060
1061         /* Save for later queries */
1062         if (!err)
1063                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1064         return err;
1065 }
1066
1067 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1068         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1069 {
1070         const u8 tso_ipid_split_en = 0;
1071         int err;
1072
1073         err = enic_set_nic_cfg(enic,
1074                 rss_default_cpu, rss_hash_type,
1075                 rss_hash_bits, rss_base_cpu,
1076                 rss_enable, tso_ipid_split_en,
1077                 enic->ig_vlan_strip_en);
1078
1079         return err;
1080 }
1081
1082 /* Initialize RSS with defaults, called from dev_configure */
1083 int enic_init_rss_nic_cfg(struct enic *enic)
1084 {
1085         static uint8_t default_rss_key[] = {
1086                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1087                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1088                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1089                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1090         };
1091         struct rte_eth_rss_conf rss_conf;
1092         union vnic_rss_cpu rss_cpu;
1093         int ret, i;
1094
1095         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1096         /*
1097          * If setting key for the first time, and the user gives us none, then
1098          * push the default key to NIC.
1099          */
1100         if (rss_conf.rss_key == NULL) {
1101                 rss_conf.rss_key = default_rss_key;
1102                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1103         }
1104         ret = enic_set_rss_conf(enic, &rss_conf);
1105         if (ret) {
1106                 dev_err(enic, "Failed to configure RSS\n");
1107                 return ret;
1108         }
1109         if (enic->rss_enable) {
1110                 /* If enabling RSS, use the default reta */
1111                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1112                         rss_cpu.cpu[i / 4].b[i % 4] =
1113                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1114                 }
1115                 ret = enic_set_rss_reta(enic, &rss_cpu);
1116                 if (ret)
1117                         dev_err(enic, "Failed to set RSS indirection table\n");
1118         }
1119         return ret;
1120 }
1121
1122 int enic_setup_finish(struct enic *enic)
1123 {
1124         enic_init_soft_stats(enic);
1125
1126         /* Default conf */
1127         vnic_dev_packet_filter(enic->vdev,
1128                 1 /* directed  */,
1129                 1 /* multicast */,
1130                 1 /* broadcast */,
1131                 0 /* promisc   */,
1132                 1 /* allmulti  */);
1133
1134         enic->promisc = 0;
1135         enic->allmulti = 1;
1136
1137         return 0;
1138 }
1139
1140 static int enic_rss_conf_valid(struct enic *enic,
1141                                struct rte_eth_rss_conf *rss_conf)
1142 {
1143         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1144         if (enic->flow_type_rss_offloads == 0)
1145                 return 0;
1146         if (rss_conf->rss_key != NULL &&
1147             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1148                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1149                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1150                 return -EINVAL;
1151         }
1152         if (rss_conf->rss_hf != 0 &&
1153             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1154                 dev_err(enic, "Given rss_hf contains none of the supported"
1155                         " types\n");
1156                 return -EINVAL;
1157         }
1158         return 0;
1159 }
1160
1161 /* Set hash type and key according to rss_conf */
1162 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1163 {
1164         struct rte_eth_dev *eth_dev;
1165         uint64_t rss_hf;
1166         u8 rss_hash_type;
1167         u8 rss_enable;
1168         int ret;
1169
1170         RTE_ASSERT(rss_conf != NULL);
1171         ret = enic_rss_conf_valid(enic, rss_conf);
1172         if (ret) {
1173                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1174                 return ret;
1175         }
1176
1177         eth_dev = enic->rte_dev;
1178         rss_hash_type = 0;
1179         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1180         if (enic->rq_count > 1 &&
1181             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1182             rss_hf != 0) {
1183                 rss_enable = 1;
1184                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1185                               ETH_RSS_NONFRAG_IPV4_OTHER))
1186                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1187                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1188                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1189                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1190                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1191                         if (enic->udp_rss_weak) {
1192                                 /*
1193                                  * 'TCP' is not a typo. The "weak" version of
1194                                  * UDP RSS requires both the TCP and UDP bits
1195                                  * be set. It does enable TCP RSS as well.
1196                                  */
1197                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1198                         }
1199                 }
1200                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1201                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1202                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1203                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1204                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1205                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1206                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1207                         if (enic->udp_rss_weak)
1208                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1209                 }
1210         } else {
1211                 rss_enable = 0;
1212                 rss_hf = 0;
1213         }
1214
1215         /* Set the hash key if provided */
1216         if (rss_enable && rss_conf->rss_key) {
1217                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1218                 if (ret) {
1219                         dev_err(enic, "Failed to set RSS key\n");
1220                         return ret;
1221                 }
1222         }
1223
1224         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1225                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1226                               rss_enable);
1227         if (!ret) {
1228                 enic->rss_hf = rss_hf;
1229                 enic->rss_hash_type = rss_hash_type;
1230                 enic->rss_enable = rss_enable;
1231         } else {
1232                 dev_err(enic, "Failed to update RSS configurations."
1233                         " hash=0x%x\n", rss_hash_type);
1234         }
1235         return ret;
1236 }
1237
1238 int enic_set_vlan_strip(struct enic *enic)
1239 {
1240         /*
1241          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1242          * together. So, re-do niccfg, preserving the current RSS settings.
1243          */
1244         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1245                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1246                                enic->rss_enable);
1247 }
1248
1249 void enic_add_packet_filter(struct enic *enic)
1250 {
1251         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1252         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1253                 enic->promisc, enic->allmulti);
1254 }
1255
1256 int enic_get_link_status(struct enic *enic)
1257 {
1258         return vnic_dev_link_status(enic->vdev);
1259 }
1260
1261 static void enic_dev_deinit(struct enic *enic)
1262 {
1263         struct rte_eth_dev *eth_dev = enic->rte_dev;
1264
1265         /* stop link status checking */
1266         vnic_dev_notify_unset(enic->vdev);
1267
1268         rte_free(eth_dev->data->mac_addrs);
1269         rte_free(enic->cq);
1270         rte_free(enic->intr);
1271         rte_free(enic->rq);
1272         rte_free(enic->wq);
1273 }
1274
1275
1276 int enic_set_vnic_res(struct enic *enic)
1277 {
1278         struct rte_eth_dev *eth_dev = enic->rte_dev;
1279         int rc = 0;
1280         unsigned int required_rq, required_wq, required_cq, required_intr;
1281
1282         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1283         required_rq = eth_dev->data->nb_rx_queues * 2;
1284         required_wq = eth_dev->data->nb_tx_queues;
1285         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1286         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1287         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1288                 required_intr += eth_dev->data->nb_rx_queues;
1289         }
1290
1291         if (enic->conf_rq_count < required_rq) {
1292                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1293                         eth_dev->data->nb_rx_queues,
1294                         required_rq, enic->conf_rq_count);
1295                 rc = -EINVAL;
1296         }
1297         if (enic->conf_wq_count < required_wq) {
1298                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1299                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1300                 rc = -EINVAL;
1301         }
1302
1303         if (enic->conf_cq_count < required_cq) {
1304                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1305                         required_cq, enic->conf_cq_count);
1306                 rc = -EINVAL;
1307         }
1308         if (enic->conf_intr_count < required_intr) {
1309                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1310                         " interrupts. Required:%u, Configured:%u\n",
1311                         required_intr, enic->conf_intr_count);
1312                 rc = -EINVAL;
1313         }
1314
1315         if (rc == 0) {
1316                 enic->rq_count = eth_dev->data->nb_rx_queues;
1317                 enic->wq_count = eth_dev->data->nb_tx_queues;
1318                 enic->cq_count = enic->rq_count + enic->wq_count;
1319                 enic->intr_count = required_intr;
1320         }
1321
1322         return rc;
1323 }
1324
1325 /* Initialize the completion queue for an RQ */
1326 static int
1327 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1328 {
1329         struct vnic_rq *sop_rq, *data_rq;
1330         unsigned int cq_idx;
1331         int rc = 0;
1332
1333         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1334         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1335         cq_idx = rq_idx;
1336
1337         vnic_cq_clean(&enic->cq[cq_idx]);
1338         vnic_cq_init(&enic->cq[cq_idx],
1339                      0 /* flow_control_enable */,
1340                      1 /* color_enable */,
1341                      0 /* cq_head */,
1342                      0 /* cq_tail */,
1343                      1 /* cq_tail_color */,
1344                      0 /* interrupt_enable */,
1345                      1 /* cq_entry_enable */,
1346                      0 /* cq_message_enable */,
1347                      0 /* interrupt offset */,
1348                      0 /* cq_message_addr */);
1349
1350
1351         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1352                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1353                            sop_rq->ring.desc_count - 1, 1, 0);
1354         if (data_rq->in_use) {
1355                 vnic_rq_init_start(data_rq,
1356                                    enic_cq_rq(enic,
1357                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1358                                    data_rq->ring.desc_count - 1, 1, 0);
1359         }
1360
1361         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1362         if (rc)
1363                 return rc;
1364
1365         if (data_rq->in_use) {
1366                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1367                 if (rc) {
1368                         enic_rxmbuf_queue_release(enic, sop_rq);
1369                         return rc;
1370                 }
1371         }
1372
1373         return 0;
1374 }
1375
1376 /* The Cisco NIC can send and receive packets up to a max packet size
1377  * determined by the NIC type and firmware. There is also an MTU
1378  * configured into the NIC via the CIMC/UCSM management interface
1379  * which can be overridden by this function (up to the max packet size).
1380  * Depending on the network setup, doing so may cause packet drops
1381  * and unexpected behavior.
1382  */
1383 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1384 {
1385         unsigned int rq_idx;
1386         struct vnic_rq *rq;
1387         int rc = 0;
1388         uint16_t old_mtu;       /* previous setting */
1389         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1390         struct rte_eth_dev *eth_dev = enic->rte_dev;
1391
1392         old_mtu = eth_dev->data->mtu;
1393         config_mtu = enic->config.mtu;
1394
1395         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1396                 return -E_RTE_SECONDARY;
1397
1398         if (new_mtu > enic->max_mtu) {
1399                 dev_err(enic,
1400                         "MTU not updated: requested (%u) greater than max (%u)\n",
1401                         new_mtu, enic->max_mtu);
1402                 return -EINVAL;
1403         }
1404         if (new_mtu < ENIC_MIN_MTU) {
1405                 dev_info(enic,
1406                         "MTU not updated: requested (%u) less than min (%u)\n",
1407                         new_mtu, ENIC_MIN_MTU);
1408                 return -EINVAL;
1409         }
1410         if (new_mtu > config_mtu)
1411                 dev_warning(enic,
1412                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1413                         new_mtu, config_mtu);
1414
1415         /* Update the MTU and maximum packet length */
1416         eth_dev->data->mtu = new_mtu;
1417         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1418                 enic_mtu_to_max_rx_pktlen(new_mtu);
1419
1420         /*
1421          * If the device has not started (enic_enable), nothing to do.
1422          * Later, enic_enable() will set up RQs reflecting the new maximum
1423          * packet length.
1424          */
1425         if (!eth_dev->data->dev_started)
1426                 goto set_mtu_done;
1427
1428         /*
1429          * The device has started, re-do RQs on the fly. In the process, we
1430          * pick up the new maximum packet length.
1431          *
1432          * Some applications rely on the ability to change MTU without stopping
1433          * the device. So keep this behavior for now.
1434          */
1435         rte_spinlock_lock(&enic->mtu_lock);
1436
1437         /* Stop traffic on all RQs */
1438         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1439                 rq = &enic->rq[rq_idx];
1440                 if (rq->is_sop && rq->in_use) {
1441                         rc = enic_stop_rq(enic,
1442                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1443                         if (rc) {
1444                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1445                                 goto set_mtu_done;
1446                         }
1447                 }
1448         }
1449
1450         /* replace Rx function with a no-op to avoid getting stale pkts */
1451         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1452         rte_mb();
1453
1454         /* Allow time for threads to exit the real Rx function. */
1455         usleep(100000);
1456
1457         /* now it is safe to reconfigure the RQs */
1458
1459
1460         /* free and reallocate RQs with the new MTU */
1461         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1462                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1463                 if (!rq->in_use)
1464                         continue;
1465
1466                 enic_free_rq(rq);
1467                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1468                                    rq->tot_nb_desc, rq->rx_free_thresh);
1469                 if (rc) {
1470                         dev_err(enic,
1471                                 "Fatal MTU alloc error- No traffic will pass\n");
1472                         goto set_mtu_done;
1473                 }
1474
1475                 rc = enic_reinit_rq(enic, rq_idx);
1476                 if (rc) {
1477                         dev_err(enic,
1478                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1479                         goto set_mtu_done;
1480                 }
1481         }
1482
1483         /* put back the real receive function */
1484         rte_mb();
1485         eth_dev->rx_pkt_burst = enic_recv_pkts;
1486         rte_mb();
1487
1488         /* restart Rx traffic */
1489         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1490                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1491                 if (rq->is_sop && rq->in_use)
1492                         enic_start_rq(enic, rq_idx);
1493         }
1494
1495 set_mtu_done:
1496         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1497         rte_spinlock_unlock(&enic->mtu_lock);
1498         return rc;
1499 }
1500
1501 static int enic_dev_init(struct enic *enic)
1502 {
1503         int err;
1504         struct rte_eth_dev *eth_dev = enic->rte_dev;
1505
1506         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1507
1508         /* Get vNIC configuration
1509         */
1510         err = enic_get_vnic_config(enic);
1511         if (err) {
1512                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1513                 return err;
1514         }
1515
1516         /* Get available resource counts */
1517         enic_get_res_counts(enic);
1518         if (enic->conf_rq_count == 1) {
1519                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1520                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1521                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1522                 return -EINVAL;
1523         }
1524         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1525         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1526                                enic->conf_cq_count, 8);
1527         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1528                                  enic->conf_intr_count, 8);
1529         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1530                                enic->conf_rq_count, 8);
1531         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1532                                enic->conf_wq_count, 8);
1533         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1534                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1535                 return -1;
1536         }
1537         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1538                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1539                 return -1;
1540         }
1541         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1542                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1543                 return -1;
1544         }
1545         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1546                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1547                 return -1;
1548         }
1549
1550         /* Get the supported filters */
1551         enic_fdir_info(enic);
1552
1553         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1554                                                 * ENIC_MAX_MAC_ADDR, 0);
1555         if (!eth_dev->data->mac_addrs) {
1556                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1557                 return -1;
1558         }
1559         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1560                         eth_dev->data->mac_addrs);
1561
1562         vnic_dev_set_reset_flag(enic->vdev, 0);
1563
1564         LIST_INIT(&enic->flows);
1565         rte_spinlock_init(&enic->flows_lock);
1566
1567         /* set up link status checking */
1568         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1569
1570         enic->overlay_offload = false;
1571         if (!enic->disable_overlay && enic->vxlan &&
1572             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1573             vnic_dev_overlay_offload_ctrl(enic->vdev,
1574                                           OVERLAY_FEATURE_VXLAN,
1575                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1576                 enic->tx_offload_capa |=
1577                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1578                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1579                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1580                 /*
1581                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1582                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1583                  */
1584                 enic->tx_offload_mask |=
1585                         PKT_TX_OUTER_IP_CKSUM |
1586                         PKT_TX_TUNNEL_MASK;
1587                 enic->overlay_offload = true;
1588                 dev_info(enic, "Overlay offload is enabled\n");
1589         }
1590
1591         return 0;
1592
1593 }
1594
1595 int enic_probe(struct enic *enic)
1596 {
1597         struct rte_pci_device *pdev = enic->pdev;
1598         int err = -1;
1599
1600         dev_debug(enic, " Initializing ENIC PMD\n");
1601
1602         /* if this is a secondary process the hardware is already initialized */
1603         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1604                 return 0;
1605
1606         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1607         enic->bar0.len = pdev->mem_resource[0].len;
1608
1609         /* Register vNIC device */
1610         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1611         if (!enic->vdev) {
1612                 dev_err(enic, "vNIC registration failed, aborting\n");
1613                 goto err_out;
1614         }
1615
1616         LIST_INIT(&enic->memzone_list);
1617         rte_spinlock_init(&enic->memzone_list_lock);
1618
1619         vnic_register_cbacks(enic->vdev,
1620                 enic_alloc_consistent,
1621                 enic_free_consistent);
1622
1623         /*
1624          * Allocate the consistent memory for stats upfront so both primary and
1625          * secondary processes can dump stats.
1626          */
1627         err = vnic_dev_alloc_stats_mem(enic->vdev);
1628         if (err) {
1629                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1630                 goto err_out_unregister;
1631         }
1632         /* Issue device open to get device in known state */
1633         err = enic_dev_open(enic);
1634         if (err) {
1635                 dev_err(enic, "vNIC dev open failed, aborting\n");
1636                 goto err_out_unregister;
1637         }
1638
1639         /* Set ingress vlan rewrite mode before vnic initialization */
1640         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1641                   enic->ig_vlan_rewrite_mode);
1642         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1643                 enic->ig_vlan_rewrite_mode);
1644         if (err) {
1645                 dev_err(enic,
1646                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1647                 goto err_out_dev_close;
1648         }
1649
1650         /* Issue device init to initialize the vnic-to-switch link.
1651          * We'll start with carrier off and wait for link UP
1652          * notification later to turn on carrier.  We don't need
1653          * to wait here for the vnic-to-switch link initialization
1654          * to complete; link UP notification is the indication that
1655          * the process is complete.
1656          */
1657
1658         err = vnic_dev_init(enic->vdev, 0);
1659         if (err) {
1660                 dev_err(enic, "vNIC dev init failed, aborting\n");
1661                 goto err_out_dev_close;
1662         }
1663
1664         err = enic_dev_init(enic);
1665         if (err) {
1666                 dev_err(enic, "Device initialization failed, aborting\n");
1667                 goto err_out_dev_close;
1668         }
1669
1670         return 0;
1671
1672 err_out_dev_close:
1673         vnic_dev_close(enic->vdev);
1674 err_out_unregister:
1675         vnic_dev_unregister(enic->vdev);
1676 err_out:
1677         return err;
1678 }
1679
1680 void enic_remove(struct enic *enic)
1681 {
1682         enic_dev_deinit(enic);
1683         vnic_dev_close(enic->vdev);
1684         vnic_dev_unregister(enic->vdev);
1685 }