863d2463c98ff8f5b6b3d076376fb672b37753cc
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct vnic_wq_buf *buf)
73 {
74         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->mb;
75
76         rte_pktmbuf_free_seg(mbuf);
77         buf->mb = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 int enic_enable(struct enic *enic)
497 {
498         unsigned int index;
499         int err;
500         struct rte_eth_dev *eth_dev = enic->rte_dev;
501
502         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
503         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
504
505         /* vnic notification of link status has already been turned on in
506          * enic_dev_init() which is called during probe time.  Here we are
507          * just turning on interrupt vector 0 if needed.
508          */
509         if (eth_dev->data->dev_conf.intr_conf.lsc)
510                 vnic_dev_notify_set(enic->vdev, 0);
511
512         err = enic_rxq_intr_init(enic);
513         if (err)
514                 return err;
515         if (enic_clsf_init(enic))
516                 dev_warning(enic, "Init of hash table for clsf failed."\
517                         "Flow director feature will not work\n");
518
519         for (index = 0; index < enic->rq_count; index++) {
520                 err = enic_alloc_rx_queue_mbufs(enic,
521                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
522                 if (err) {
523                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
524                         return err;
525                 }
526                 err = enic_alloc_rx_queue_mbufs(enic,
527                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
528                 if (err) {
529                         /* release the allocated mbufs for the sop rq*/
530                         enic_rxmbuf_queue_release(enic,
531                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
532
533                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
534                         return err;
535                 }
536         }
537
538         for (index = 0; index < enic->wq_count; index++)
539                 enic_start_wq(enic, index);
540         for (index = 0; index < enic->rq_count; index++)
541                 enic_start_rq(enic, index);
542
543         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
544
545         vnic_dev_enable_wait(enic->vdev);
546
547         /* Register and enable error interrupt */
548         rte_intr_callback_register(&(enic->pdev->intr_handle),
549                 enic_intr_handler, (void *)enic->rte_dev);
550
551         rte_intr_enable(&(enic->pdev->intr_handle));
552         /* Unmask LSC interrupt */
553         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
554
555         return 0;
556 }
557
558 int enic_alloc_intr_resources(struct enic *enic)
559 {
560         int err;
561         unsigned int i;
562
563         dev_info(enic, "vNIC resources used:  "\
564                 "wq %d rq %d cq %d intr %d\n",
565                 enic->wq_count, enic_vnic_rq_count(enic),
566                 enic->cq_count, enic->intr_count);
567
568         for (i = 0; i < enic->intr_count; i++) {
569                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
570                 if (err) {
571                         enic_free_vnic_resources(enic);
572                         return err;
573                 }
574         }
575         return 0;
576 }
577
578 void enic_free_rq(void *rxq)
579 {
580         struct vnic_rq *rq_sop, *rq_data;
581         struct enic *enic;
582
583         if (rxq == NULL)
584                 return;
585
586         rq_sop = (struct vnic_rq *)rxq;
587         enic = vnic_dev_priv(rq_sop->vdev);
588         rq_data = &enic->rq[rq_sop->data_queue_idx];
589
590         enic_rxmbuf_queue_release(enic, rq_sop);
591         if (rq_data->in_use)
592                 enic_rxmbuf_queue_release(enic, rq_data);
593
594         rte_free(rq_sop->mbuf_ring);
595         if (rq_data->in_use)
596                 rte_free(rq_data->mbuf_ring);
597
598         rq_sop->mbuf_ring = NULL;
599         rq_data->mbuf_ring = NULL;
600
601         vnic_rq_free(rq_sop);
602         if (rq_data->in_use)
603                 vnic_rq_free(rq_data);
604
605         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
606
607         rq_sop->in_use = 0;
608         rq_data->in_use = 0;
609 }
610
611 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
612 {
613         struct rte_eth_dev *eth_dev = enic->rte_dev;
614         vnic_wq_enable(&enic->wq[queue_idx]);
615         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
616 }
617
618 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
619 {
620         struct rte_eth_dev *eth_dev = enic->rte_dev;
621         int ret;
622
623         ret = vnic_wq_disable(&enic->wq[queue_idx]);
624         if (ret)
625                 return ret;
626
627         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
628         return 0;
629 }
630
631 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
632 {
633         struct vnic_rq *rq_sop;
634         struct vnic_rq *rq_data;
635         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
636         rq_data = &enic->rq[rq_sop->data_queue_idx];
637         struct rte_eth_dev *eth_dev = enic->rte_dev;
638
639         if (rq_data->in_use) {
640                 vnic_rq_enable(rq_data);
641                 enic_initial_post_rx(enic, rq_data);
642         }
643         rte_mb();
644         vnic_rq_enable(rq_sop);
645         enic_initial_post_rx(enic, rq_sop);
646         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
647 }
648
649 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
650 {
651         int ret1 = 0, ret2 = 0;
652         struct rte_eth_dev *eth_dev = enic->rte_dev;
653         struct vnic_rq *rq_sop;
654         struct vnic_rq *rq_data;
655         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
656         rq_data = &enic->rq[rq_sop->data_queue_idx];
657
658         ret2 = vnic_rq_disable(rq_sop);
659         rte_mb();
660         if (rq_data->in_use)
661                 ret1 = vnic_rq_disable(rq_data);
662
663         if (ret2)
664                 return ret2;
665         else if (ret1)
666                 return ret1;
667
668         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
669         return 0;
670 }
671
672 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
673         unsigned int socket_id, struct rte_mempool *mp,
674         uint16_t nb_desc, uint16_t free_thresh)
675 {
676         int rc;
677         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
678         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
679         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
680         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
681         unsigned int mbuf_size, mbufs_per_pkt;
682         unsigned int nb_sop_desc, nb_data_desc;
683         uint16_t min_sop, max_sop, min_data, max_data;
684         uint32_t max_rx_pkt_len;
685
686         rq_sop->is_sop = 1;
687         rq_sop->data_queue_idx = data_queue_idx;
688         rq_data->is_sop = 0;
689         rq_data->data_queue_idx = 0;
690         rq_sop->socket_id = socket_id;
691         rq_sop->mp = mp;
692         rq_data->socket_id = socket_id;
693         rq_data->mp = mp;
694         rq_sop->in_use = 1;
695         rq_sop->rx_free_thresh = free_thresh;
696         rq_data->rx_free_thresh = free_thresh;
697         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
698                   free_thresh);
699
700         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
701                                RTE_PKTMBUF_HEADROOM);
702         /* max_rx_pkt_len includes the ethernet header and CRC. */
703         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
704
705         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
706             DEV_RX_OFFLOAD_SCATTER) {
707                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
708                 /* ceil((max pkt len)/mbuf_size) */
709                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
710         } else {
711                 dev_info(enic, "Scatter rx mode disabled\n");
712                 mbufs_per_pkt = 1;
713                 if (max_rx_pkt_len > mbuf_size) {
714                         dev_warning(enic, "The maximum Rx packet size (%u) is"
715                                     " larger than the mbuf size (%u), and"
716                                     " scatter is disabled. Larger packets will"
717                                     " be truncated.\n",
718                                     max_rx_pkt_len, mbuf_size);
719                 }
720         }
721
722         if (mbufs_per_pkt > 1) {
723                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
724                 rq_sop->data_queue_enable = 1;
725                 rq_data->in_use = 1;
726                 /*
727                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
728                  * receives packet sizes up to the "max" MTU.
729                  * If not using scatter, we can achieve the effect of dropping
730                  * larger packets by reducing the size of posted buffers.
731                  * See enic_alloc_rx_queue_mbufs().
732                  */
733                 if (max_rx_pkt_len <
734                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
735                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
736                                     " when scatter rx mode is in use.\n");
737                 }
738         } else {
739                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
740                          queue_idx);
741                 rq_sop->data_queue_enable = 0;
742                 rq_data->in_use = 0;
743         }
744
745         /* number of descriptors have to be a multiple of 32 */
746         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ~0x1F;
747         nb_data_desc = (nb_desc - nb_sop_desc) & ~0x1F;
748
749         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
750         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
751
752         if (mbufs_per_pkt > 1) {
753                 min_sop = 64;
754                 max_sop = ((enic->config.rq_desc_count /
755                             (mbufs_per_pkt - 1)) & ~0x1F);
756                 min_data = min_sop * (mbufs_per_pkt - 1);
757                 max_data = enic->config.rq_desc_count;
758         } else {
759                 min_sop = 64;
760                 max_sop = enic->config.rq_desc_count;
761                 min_data = 0;
762                 max_data = 0;
763         }
764
765         if (nb_desc < (min_sop + min_data)) {
766                 dev_warning(enic,
767                             "Number of rx descs too low, adjusting to minimum\n");
768                 nb_sop_desc = min_sop;
769                 nb_data_desc = min_data;
770         } else if (nb_desc > (max_sop + max_data)) {
771                 dev_warning(enic,
772                             "Number of rx_descs too high, adjusting to maximum\n");
773                 nb_sop_desc = max_sop;
774                 nb_data_desc = max_data;
775         }
776         if (mbufs_per_pkt > 1) {
777                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
778                          " rx descriptor range is %u to %u\n",
779                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
780                          max_sop + max_data);
781         }
782         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
783                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
784
785         /* Allocate sop queue resources */
786         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
787                 nb_sop_desc, sizeof(struct rq_enet_desc));
788         if (rc) {
789                 dev_err(enic, "error in allocation of sop rq\n");
790                 goto err_exit;
791         }
792         nb_sop_desc = rq_sop->ring.desc_count;
793
794         if (rq_data->in_use) {
795                 /* Allocate data queue resources */
796                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
797                                    nb_data_desc,
798                                    sizeof(struct rq_enet_desc));
799                 if (rc) {
800                         dev_err(enic, "error in allocation of data rq\n");
801                         goto err_free_rq_sop;
802                 }
803                 nb_data_desc = rq_data->ring.desc_count;
804         }
805         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
806                            socket_id, nb_sop_desc + nb_data_desc,
807                            sizeof(struct cq_enet_rq_desc));
808         if (rc) {
809                 dev_err(enic, "error in allocation of cq for rq\n");
810                 goto err_free_rq_data;
811         }
812
813         /* Allocate the mbuf rings */
814         rq_sop->mbuf_ring = (struct rte_mbuf **)
815                 rte_zmalloc_socket("rq->mbuf_ring",
816                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
817                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
818         if (rq_sop->mbuf_ring == NULL)
819                 goto err_free_cq;
820
821         if (rq_data->in_use) {
822                 rq_data->mbuf_ring = (struct rte_mbuf **)
823                         rte_zmalloc_socket("rq->mbuf_ring",
824                                 sizeof(struct rte_mbuf *) * nb_data_desc,
825                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
826                 if (rq_data->mbuf_ring == NULL)
827                         goto err_free_sop_mbuf;
828         }
829
830         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
831
832         return 0;
833
834 err_free_sop_mbuf:
835         rte_free(rq_sop->mbuf_ring);
836 err_free_cq:
837         /* cleanup on error */
838         vnic_cq_free(&enic->cq[queue_idx]);
839 err_free_rq_data:
840         if (rq_data->in_use)
841                 vnic_rq_free(rq_data);
842 err_free_rq_sop:
843         vnic_rq_free(rq_sop);
844 err_exit:
845         return -ENOMEM;
846 }
847
848 void enic_free_wq(void *txq)
849 {
850         struct vnic_wq *wq;
851         struct enic *enic;
852
853         if (txq == NULL)
854                 return;
855
856         wq = (struct vnic_wq *)txq;
857         enic = vnic_dev_priv(wq->vdev);
858         rte_memzone_free(wq->cqmsg_rz);
859         vnic_wq_free(wq);
860         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
861 }
862
863 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
864         unsigned int socket_id, uint16_t nb_desc)
865 {
866         int err;
867         struct vnic_wq *wq = &enic->wq[queue_idx];
868         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
869         char name[NAME_MAX];
870         static int instance;
871
872         wq->socket_id = socket_id;
873         if (nb_desc > enic->config.wq_desc_count) {
874                 dev_warning(enic,
875                             "WQ %d - number of tx desc in cmd line (%d) "
876                             "is greater than that in the UCSM/CIMC adapter "
877                             "policy.  Applying the value in the adapter "
878                             "policy (%d)\n",
879                             queue_idx, nb_desc, enic->config.wq_desc_count);
880                 nb_desc = enic->config.wq_desc_count;
881         } else if (nb_desc != enic->config.wq_desc_count) {
882                 dev_info(enic,
883                          "TX Queues - effective number of descs:%d\n",
884                          nb_desc);
885         }
886
887         /* Allocate queue resources */
888         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
889                 nb_desc,
890                 sizeof(struct wq_enet_desc));
891         if (err) {
892                 dev_err(enic, "error in allocation of wq\n");
893                 return err;
894         }
895
896         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
897                 socket_id, nb_desc,
898                 sizeof(struct cq_enet_wq_desc));
899         if (err) {
900                 vnic_wq_free(wq);
901                 dev_err(enic, "error in allocation of cq for wq\n");
902         }
903
904         /* setup up CQ message */
905         snprintf((char *)name, sizeof(name),
906                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
907                 instance++);
908
909         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
910                         sizeof(uint32_t), SOCKET_ID_ANY,
911                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
912         if (!wq->cqmsg_rz)
913                 return -ENOMEM;
914
915         return err;
916 }
917
918 int enic_disable(struct enic *enic)
919 {
920         unsigned int i;
921         int err;
922
923         for (i = 0; i < enic->intr_count; i++) {
924                 vnic_intr_mask(&enic->intr[i]);
925                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
926         }
927         enic_rxq_intr_deinit(enic);
928         rte_intr_disable(&enic->pdev->intr_handle);
929         rte_intr_callback_unregister(&enic->pdev->intr_handle,
930                                      enic_intr_handler,
931                                      (void *)enic->rte_dev);
932
933         vnic_dev_disable(enic->vdev);
934
935         enic_clsf_destroy(enic);
936
937         if (!enic_is_sriov_vf(enic))
938                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
939
940         for (i = 0; i < enic->wq_count; i++) {
941                 err = vnic_wq_disable(&enic->wq[i]);
942                 if (err)
943                         return err;
944         }
945         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
946                 if (enic->rq[i].in_use) {
947                         err = vnic_rq_disable(&enic->rq[i]);
948                         if (err)
949                                 return err;
950                 }
951         }
952
953         /* If we were using interrupts, set the interrupt vector to -1
954          * to disable interrupts.  We are not disabling link notifcations,
955          * though, as we want the polling of link status to continue working.
956          */
957         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
958                 vnic_dev_notify_set(enic->vdev, -1);
959
960         vnic_dev_set_reset_flag(enic->vdev, 1);
961
962         for (i = 0; i < enic->wq_count; i++)
963                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
964
965         for (i = 0; i < enic_vnic_rq_count(enic); i++)
966                 if (enic->rq[i].in_use)
967                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
968         for (i = 0; i < enic->cq_count; i++)
969                 vnic_cq_clean(&enic->cq[i]);
970         for (i = 0; i < enic->intr_count; i++)
971                 vnic_intr_clean(&enic->intr[i]);
972
973         return 0;
974 }
975
976 static int enic_dev_wait(struct vnic_dev *vdev,
977         int (*start)(struct vnic_dev *, int),
978         int (*finished)(struct vnic_dev *, int *),
979         int arg)
980 {
981         int done;
982         int err;
983         int i;
984
985         err = start(vdev, arg);
986         if (err)
987                 return err;
988
989         /* Wait for func to complete...2 seconds max */
990         for (i = 0; i < 2000; i++) {
991                 err = finished(vdev, &done);
992                 if (err)
993                         return err;
994                 if (done)
995                         return 0;
996                 usleep(1000);
997         }
998         return -ETIMEDOUT;
999 }
1000
1001 static int enic_dev_open(struct enic *enic)
1002 {
1003         int err;
1004         int flags = CMD_OPENF_IG_DESCCACHE;
1005
1006         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1007                 vnic_dev_open_done, flags);
1008         if (err)
1009                 dev_err(enic_get_dev(enic),
1010                         "vNIC device open failed, err %d\n", err);
1011
1012         return err;
1013 }
1014
1015 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1016 {
1017         dma_addr_t rss_key_buf_pa;
1018         union vnic_rss_key *rss_key_buf_va = NULL;
1019         int err, i;
1020         u8 name[NAME_MAX];
1021
1022         RTE_ASSERT(user_key != NULL);
1023         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1024         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1025                 &rss_key_buf_pa, name);
1026         if (!rss_key_buf_va)
1027                 return -ENOMEM;
1028
1029         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1030                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1031
1032         err = enic_set_rss_key(enic,
1033                 rss_key_buf_pa,
1034                 sizeof(union vnic_rss_key));
1035
1036         /* Save for later queries */
1037         if (!err) {
1038                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1039                            sizeof(union vnic_rss_key));
1040         }
1041         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1042                 rss_key_buf_va, rss_key_buf_pa);
1043
1044         return err;
1045 }
1046
1047 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1048 {
1049         dma_addr_t rss_cpu_buf_pa;
1050         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1051         int err;
1052         u8 name[NAME_MAX];
1053
1054         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1055         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1056                 &rss_cpu_buf_pa, name);
1057         if (!rss_cpu_buf_va)
1058                 return -ENOMEM;
1059
1060         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1061
1062         err = enic_set_rss_cpu(enic,
1063                 rss_cpu_buf_pa,
1064                 sizeof(union vnic_rss_cpu));
1065
1066         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1067                 rss_cpu_buf_va, rss_cpu_buf_pa);
1068
1069         /* Save for later queries */
1070         if (!err)
1071                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1072         return err;
1073 }
1074
1075 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1076         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1077 {
1078         const u8 tso_ipid_split_en = 0;
1079         int err;
1080
1081         err = enic_set_nic_cfg(enic,
1082                 rss_default_cpu, rss_hash_type,
1083                 rss_hash_bits, rss_base_cpu,
1084                 rss_enable, tso_ipid_split_en,
1085                 enic->ig_vlan_strip_en);
1086
1087         return err;
1088 }
1089
1090 /* Initialize RSS with defaults, called from dev_configure */
1091 int enic_init_rss_nic_cfg(struct enic *enic)
1092 {
1093         static uint8_t default_rss_key[] = {
1094                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1095                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1096                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1097                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1098         };
1099         struct rte_eth_rss_conf rss_conf;
1100         union vnic_rss_cpu rss_cpu;
1101         int ret, i;
1102
1103         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1104         /*
1105          * If setting key for the first time, and the user gives us none, then
1106          * push the default key to NIC.
1107          */
1108         if (rss_conf.rss_key == NULL) {
1109                 rss_conf.rss_key = default_rss_key;
1110                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1111         }
1112         ret = enic_set_rss_conf(enic, &rss_conf);
1113         if (ret) {
1114                 dev_err(enic, "Failed to configure RSS\n");
1115                 return ret;
1116         }
1117         if (enic->rss_enable) {
1118                 /* If enabling RSS, use the default reta */
1119                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1120                         rss_cpu.cpu[i / 4].b[i % 4] =
1121                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1122                 }
1123                 ret = enic_set_rss_reta(enic, &rss_cpu);
1124                 if (ret)
1125                         dev_err(enic, "Failed to set RSS indirection table\n");
1126         }
1127         return ret;
1128 }
1129
1130 int enic_setup_finish(struct enic *enic)
1131 {
1132         enic_init_soft_stats(enic);
1133
1134         /* Default conf */
1135         vnic_dev_packet_filter(enic->vdev,
1136                 1 /* directed  */,
1137                 1 /* multicast */,
1138                 1 /* broadcast */,
1139                 0 /* promisc   */,
1140                 1 /* allmulti  */);
1141
1142         enic->promisc = 0;
1143         enic->allmulti = 1;
1144
1145         return 0;
1146 }
1147
1148 static int enic_rss_conf_valid(struct enic *enic,
1149                                struct rte_eth_rss_conf *rss_conf)
1150 {
1151         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1152         if (enic->flow_type_rss_offloads == 0)
1153                 return 0;
1154         if (rss_conf->rss_key != NULL &&
1155             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1156                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1157                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1158                 return -EINVAL;
1159         }
1160         if (rss_conf->rss_hf != 0 &&
1161             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1162                 dev_err(enic, "Given rss_hf contains none of the supported"
1163                         " types\n");
1164                 return -EINVAL;
1165         }
1166         return 0;
1167 }
1168
1169 /* Set hash type and key according to rss_conf */
1170 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1171 {
1172         struct rte_eth_dev *eth_dev;
1173         uint64_t rss_hf;
1174         u8 rss_hash_type;
1175         u8 rss_enable;
1176         int ret;
1177
1178         RTE_ASSERT(rss_conf != NULL);
1179         ret = enic_rss_conf_valid(enic, rss_conf);
1180         if (ret) {
1181                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1182                 return ret;
1183         }
1184
1185         eth_dev = enic->rte_dev;
1186         rss_hash_type = 0;
1187         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1188         if (enic->rq_count > 1 &&
1189             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1190             rss_hf != 0) {
1191                 rss_enable = 1;
1192                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1193                               ETH_RSS_NONFRAG_IPV4_OTHER))
1194                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1195                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1196                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1197                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1198                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1199                         if (enic->udp_rss_weak) {
1200                                 /*
1201                                  * 'TCP' is not a typo. The "weak" version of
1202                                  * UDP RSS requires both the TCP and UDP bits
1203                                  * be set. It does enable TCP RSS as well.
1204                                  */
1205                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1206                         }
1207                 }
1208                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1209                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1210                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1211                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1212                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1213                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1214                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1215                         if (enic->udp_rss_weak)
1216                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1217                 }
1218         } else {
1219                 rss_enable = 0;
1220                 rss_hf = 0;
1221         }
1222
1223         /* Set the hash key if provided */
1224         if (rss_enable && rss_conf->rss_key) {
1225                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1226                 if (ret) {
1227                         dev_err(enic, "Failed to set RSS key\n");
1228                         return ret;
1229                 }
1230         }
1231
1232         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1233                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1234                               rss_enable);
1235         if (!ret) {
1236                 enic->rss_hf = rss_hf;
1237                 enic->rss_hash_type = rss_hash_type;
1238                 enic->rss_enable = rss_enable;
1239         } else {
1240                 dev_err(enic, "Failed to update RSS configurations."
1241                         " hash=0x%x\n", rss_hash_type);
1242         }
1243         return ret;
1244 }
1245
1246 int enic_set_vlan_strip(struct enic *enic)
1247 {
1248         /*
1249          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1250          * together. So, re-do niccfg, preserving the current RSS settings.
1251          */
1252         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1253                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1254                                enic->rss_enable);
1255 }
1256
1257 void enic_add_packet_filter(struct enic *enic)
1258 {
1259         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1260         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1261                 enic->promisc, enic->allmulti);
1262 }
1263
1264 int enic_get_link_status(struct enic *enic)
1265 {
1266         return vnic_dev_link_status(enic->vdev);
1267 }
1268
1269 static void enic_dev_deinit(struct enic *enic)
1270 {
1271         struct rte_eth_dev *eth_dev = enic->rte_dev;
1272
1273         /* stop link status checking */
1274         vnic_dev_notify_unset(enic->vdev);
1275
1276         rte_free(eth_dev->data->mac_addrs);
1277         rte_free(enic->cq);
1278         rte_free(enic->intr);
1279         rte_free(enic->rq);
1280         rte_free(enic->wq);
1281 }
1282
1283
1284 int enic_set_vnic_res(struct enic *enic)
1285 {
1286         struct rte_eth_dev *eth_dev = enic->rte_dev;
1287         int rc = 0;
1288         unsigned int required_rq, required_wq, required_cq, required_intr;
1289
1290         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1291         required_rq = eth_dev->data->nb_rx_queues * 2;
1292         required_wq = eth_dev->data->nb_tx_queues;
1293         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1294         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1295         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1296                 required_intr += eth_dev->data->nb_rx_queues;
1297         }
1298
1299         if (enic->conf_rq_count < required_rq) {
1300                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1301                         eth_dev->data->nb_rx_queues,
1302                         required_rq, enic->conf_rq_count);
1303                 rc = -EINVAL;
1304         }
1305         if (enic->conf_wq_count < required_wq) {
1306                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1307                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1308                 rc = -EINVAL;
1309         }
1310
1311         if (enic->conf_cq_count < required_cq) {
1312                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1313                         required_cq, enic->conf_cq_count);
1314                 rc = -EINVAL;
1315         }
1316         if (enic->conf_intr_count < required_intr) {
1317                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1318                         " interrupts. Required:%u, Configured:%u\n",
1319                         required_intr, enic->conf_intr_count);
1320                 rc = -EINVAL;
1321         }
1322
1323         if (rc == 0) {
1324                 enic->rq_count = eth_dev->data->nb_rx_queues;
1325                 enic->wq_count = eth_dev->data->nb_tx_queues;
1326                 enic->cq_count = enic->rq_count + enic->wq_count;
1327                 enic->intr_count = required_intr;
1328         }
1329
1330         return rc;
1331 }
1332
1333 /* Initialize the completion queue for an RQ */
1334 static int
1335 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1336 {
1337         struct vnic_rq *sop_rq, *data_rq;
1338         unsigned int cq_idx;
1339         int rc = 0;
1340
1341         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1342         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1343         cq_idx = rq_idx;
1344
1345         vnic_cq_clean(&enic->cq[cq_idx]);
1346         vnic_cq_init(&enic->cq[cq_idx],
1347                      0 /* flow_control_enable */,
1348                      1 /* color_enable */,
1349                      0 /* cq_head */,
1350                      0 /* cq_tail */,
1351                      1 /* cq_tail_color */,
1352                      0 /* interrupt_enable */,
1353                      1 /* cq_entry_enable */,
1354                      0 /* cq_message_enable */,
1355                      0 /* interrupt offset */,
1356                      0 /* cq_message_addr */);
1357
1358
1359         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1360                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1361                            sop_rq->ring.desc_count - 1, 1, 0);
1362         if (data_rq->in_use) {
1363                 vnic_rq_init_start(data_rq,
1364                                    enic_cq_rq(enic,
1365                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1366                                    data_rq->ring.desc_count - 1, 1, 0);
1367         }
1368
1369         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1370         if (rc)
1371                 return rc;
1372
1373         if (data_rq->in_use) {
1374                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1375                 if (rc) {
1376                         enic_rxmbuf_queue_release(enic, sop_rq);
1377                         return rc;
1378                 }
1379         }
1380
1381         return 0;
1382 }
1383
1384 /* The Cisco NIC can send and receive packets up to a max packet size
1385  * determined by the NIC type and firmware. There is also an MTU
1386  * configured into the NIC via the CIMC/UCSM management interface
1387  * which can be overridden by this function (up to the max packet size).
1388  * Depending on the network setup, doing so may cause packet drops
1389  * and unexpected behavior.
1390  */
1391 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1392 {
1393         unsigned int rq_idx;
1394         struct vnic_rq *rq;
1395         int rc = 0;
1396         uint16_t old_mtu;       /* previous setting */
1397         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1398         struct rte_eth_dev *eth_dev = enic->rte_dev;
1399
1400         old_mtu = eth_dev->data->mtu;
1401         config_mtu = enic->config.mtu;
1402
1403         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1404                 return -E_RTE_SECONDARY;
1405
1406         if (new_mtu > enic->max_mtu) {
1407                 dev_err(enic,
1408                         "MTU not updated: requested (%u) greater than max (%u)\n",
1409                         new_mtu, enic->max_mtu);
1410                 return -EINVAL;
1411         }
1412         if (new_mtu < ENIC_MIN_MTU) {
1413                 dev_info(enic,
1414                         "MTU not updated: requested (%u) less than min (%u)\n",
1415                         new_mtu, ENIC_MIN_MTU);
1416                 return -EINVAL;
1417         }
1418         if (new_mtu > config_mtu)
1419                 dev_warning(enic,
1420                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1421                         new_mtu, config_mtu);
1422
1423         /* Update the MTU and maximum packet length */
1424         eth_dev->data->mtu = new_mtu;
1425         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1426                 enic_mtu_to_max_rx_pktlen(new_mtu);
1427
1428         /*
1429          * If the device has not started (enic_enable), nothing to do.
1430          * Later, enic_enable() will set up RQs reflecting the new maximum
1431          * packet length.
1432          */
1433         if (!eth_dev->data->dev_started)
1434                 goto set_mtu_done;
1435
1436         /*
1437          * The device has started, re-do RQs on the fly. In the process, we
1438          * pick up the new maximum packet length.
1439          *
1440          * Some applications rely on the ability to change MTU without stopping
1441          * the device. So keep this behavior for now.
1442          */
1443         rte_spinlock_lock(&enic->mtu_lock);
1444
1445         /* Stop traffic on all RQs */
1446         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1447                 rq = &enic->rq[rq_idx];
1448                 if (rq->is_sop && rq->in_use) {
1449                         rc = enic_stop_rq(enic,
1450                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1451                         if (rc) {
1452                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1453                                 goto set_mtu_done;
1454                         }
1455                 }
1456         }
1457
1458         /* replace Rx function with a no-op to avoid getting stale pkts */
1459         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1460         rte_mb();
1461
1462         /* Allow time for threads to exit the real Rx function. */
1463         usleep(100000);
1464
1465         /* now it is safe to reconfigure the RQs */
1466
1467
1468         /* free and reallocate RQs with the new MTU */
1469         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1470                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1471                 if (!rq->in_use)
1472                         continue;
1473
1474                 enic_free_rq(rq);
1475                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1476                                    rq->tot_nb_desc, rq->rx_free_thresh);
1477                 if (rc) {
1478                         dev_err(enic,
1479                                 "Fatal MTU alloc error- No traffic will pass\n");
1480                         goto set_mtu_done;
1481                 }
1482
1483                 rc = enic_reinit_rq(enic, rq_idx);
1484                 if (rc) {
1485                         dev_err(enic,
1486                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1487                         goto set_mtu_done;
1488                 }
1489         }
1490
1491         /* put back the real receive function */
1492         rte_mb();
1493         eth_dev->rx_pkt_burst = enic_recv_pkts;
1494         rte_mb();
1495
1496         /* restart Rx traffic */
1497         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1498                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1499                 if (rq->is_sop && rq->in_use)
1500                         enic_start_rq(enic, rq_idx);
1501         }
1502
1503 set_mtu_done:
1504         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1505         rte_spinlock_unlock(&enic->mtu_lock);
1506         return rc;
1507 }
1508
1509 static int enic_dev_init(struct enic *enic)
1510 {
1511         int err;
1512         struct rte_eth_dev *eth_dev = enic->rte_dev;
1513
1514         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1515
1516         /* Get vNIC configuration
1517         */
1518         err = enic_get_vnic_config(enic);
1519         if (err) {
1520                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1521                 return err;
1522         }
1523
1524         /* Get available resource counts */
1525         enic_get_res_counts(enic);
1526         if (enic->conf_rq_count == 1) {
1527                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1528                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1529                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1530                 return -EINVAL;
1531         }
1532         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1533         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1534                                enic->conf_cq_count, 8);
1535         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1536                                  enic->conf_intr_count, 8);
1537         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1538                                enic->conf_rq_count, 8);
1539         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1540                                enic->conf_wq_count, 8);
1541         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1542                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1543                 return -1;
1544         }
1545         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1546                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1547                 return -1;
1548         }
1549         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1550                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1551                 return -1;
1552         }
1553         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1554                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1555                 return -1;
1556         }
1557
1558         /* Get the supported filters */
1559         enic_fdir_info(enic);
1560
1561         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1562                                                 * ENIC_MAX_MAC_ADDR, 0);
1563         if (!eth_dev->data->mac_addrs) {
1564                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1565                 return -1;
1566         }
1567         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1568                         eth_dev->data->mac_addrs);
1569
1570         vnic_dev_set_reset_flag(enic->vdev, 0);
1571
1572         LIST_INIT(&enic->flows);
1573         rte_spinlock_init(&enic->flows_lock);
1574
1575         /* set up link status checking */
1576         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1577
1578         enic->overlay_offload = false;
1579         if (!enic->disable_overlay && enic->vxlan &&
1580             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1581             vnic_dev_overlay_offload_ctrl(enic->vdev,
1582                                           OVERLAY_FEATURE_VXLAN,
1583                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1584                 enic->tx_offload_capa |=
1585                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1586                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1587                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1588                 /*
1589                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1590                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1591                  */
1592                 enic->tx_offload_mask |=
1593                         PKT_TX_OUTER_IP_CKSUM |
1594                         PKT_TX_TUNNEL_MASK;
1595                 enic->overlay_offload = true;
1596                 dev_info(enic, "Overlay offload is enabled\n");
1597         }
1598
1599         return 0;
1600
1601 }
1602
1603 int enic_probe(struct enic *enic)
1604 {
1605         struct rte_pci_device *pdev = enic->pdev;
1606         int err = -1;
1607
1608         dev_debug(enic, " Initializing ENIC PMD\n");
1609
1610         /* if this is a secondary process the hardware is already initialized */
1611         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1612                 return 0;
1613
1614         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1615         enic->bar0.len = pdev->mem_resource[0].len;
1616
1617         /* Register vNIC device */
1618         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1619         if (!enic->vdev) {
1620                 dev_err(enic, "vNIC registration failed, aborting\n");
1621                 goto err_out;
1622         }
1623
1624         LIST_INIT(&enic->memzone_list);
1625         rte_spinlock_init(&enic->memzone_list_lock);
1626
1627         vnic_register_cbacks(enic->vdev,
1628                 enic_alloc_consistent,
1629                 enic_free_consistent);
1630
1631         /*
1632          * Allocate the consistent memory for stats upfront so both primary and
1633          * secondary processes can dump stats.
1634          */
1635         err = vnic_dev_alloc_stats_mem(enic->vdev);
1636         if (err) {
1637                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1638                 goto err_out_unregister;
1639         }
1640         /* Issue device open to get device in known state */
1641         err = enic_dev_open(enic);
1642         if (err) {
1643                 dev_err(enic, "vNIC dev open failed, aborting\n");
1644                 goto err_out_unregister;
1645         }
1646
1647         /* Set ingress vlan rewrite mode before vnic initialization */
1648         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1649                 IG_VLAN_REWRITE_MODE_PASS_THRU);
1650         if (err) {
1651                 dev_err(enic,
1652                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1653                 goto err_out_dev_close;
1654         }
1655
1656         /* Issue device init to initialize the vnic-to-switch link.
1657          * We'll start with carrier off and wait for link UP
1658          * notification later to turn on carrier.  We don't need
1659          * to wait here for the vnic-to-switch link initialization
1660          * to complete; link UP notification is the indication that
1661          * the process is complete.
1662          */
1663
1664         err = vnic_dev_init(enic->vdev, 0);
1665         if (err) {
1666                 dev_err(enic, "vNIC dev init failed, aborting\n");
1667                 goto err_out_dev_close;
1668         }
1669
1670         err = enic_dev_init(enic);
1671         if (err) {
1672                 dev_err(enic, "Device initialization failed, aborting\n");
1673                 goto err_out_dev_close;
1674         }
1675
1676         return 0;
1677
1678 err_out_dev_close:
1679         vnic_dev_close(enic->vdev);
1680 err_out_unregister:
1681         vnic_dev_unregister(enic->vdev);
1682 err_out:
1683         return err;
1684 }
1685
1686 void enic_remove(struct enic *enic)
1687 {
1688         enic_dev_deinit(enic);
1689         vnic_dev_close(enic->vdev);
1690         vnic_dev_unregister(enic->vdev);
1691 }