net/enic: add the simple version of Tx handler
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74         struct rte_mbuf *mbuf = *buf;
75
76         rte_pktmbuf_free_seg(mbuf);
77         *buf = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
497 {
498         struct wq_enet_desc *desc;
499         struct vnic_wq *wq;
500         unsigned int i;
501
502         /*
503          * Fill WQ descriptor fields that never change. Every descriptor is
504          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
505          * descriptors (i.e. request one completion update every 32 packets).
506          */
507         wq = &enic->wq[queue_idx];
508         desc = (struct wq_enet_desc *)wq->ring.descs;
509         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
510                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
511                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
512                         desc->header_length_flags |=
513                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
514         }
515 }
516
517 int enic_enable(struct enic *enic)
518 {
519         unsigned int index;
520         int err;
521         struct rte_eth_dev *eth_dev = enic->rte_dev;
522
523         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
524         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
525
526         /* vnic notification of link status has already been turned on in
527          * enic_dev_init() which is called during probe time.  Here we are
528          * just turning on interrupt vector 0 if needed.
529          */
530         if (eth_dev->data->dev_conf.intr_conf.lsc)
531                 vnic_dev_notify_set(enic->vdev, 0);
532
533         err = enic_rxq_intr_init(enic);
534         if (err)
535                 return err;
536         if (enic_clsf_init(enic))
537                 dev_warning(enic, "Init of hash table for clsf failed."\
538                         "Flow director feature will not work\n");
539
540         for (index = 0; index < enic->rq_count; index++) {
541                 err = enic_alloc_rx_queue_mbufs(enic,
542                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
543                 if (err) {
544                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
545                         return err;
546                 }
547                 err = enic_alloc_rx_queue_mbufs(enic,
548                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
549                 if (err) {
550                         /* release the allocated mbufs for the sop rq*/
551                         enic_rxmbuf_queue_release(enic,
552                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
553
554                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
555                         return err;
556                 }
557         }
558
559         /*
560          * Use the simple TX handler if possible. All offloads must be disabled
561          * except mbuf fast free.
562          */
563         if ((eth_dev->data->dev_conf.txmode.offloads &
564              ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) == 0) {
565                 PMD_INIT_LOG(DEBUG, " use the simple tx handler");
566                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
567                 for (index = 0; index < enic->wq_count; index++)
568                         enic_prep_wq_for_simple_tx(enic, index);
569         } else {
570                 PMD_INIT_LOG(DEBUG, " use the default tx handler");
571                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
572         }
573
574         for (index = 0; index < enic->wq_count; index++)
575                 enic_start_wq(enic, index);
576         for (index = 0; index < enic->rq_count; index++)
577                 enic_start_rq(enic, index);
578
579         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
580
581         vnic_dev_enable_wait(enic->vdev);
582
583         /* Register and enable error interrupt */
584         rte_intr_callback_register(&(enic->pdev->intr_handle),
585                 enic_intr_handler, (void *)enic->rte_dev);
586
587         rte_intr_enable(&(enic->pdev->intr_handle));
588         /* Unmask LSC interrupt */
589         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
590
591         return 0;
592 }
593
594 int enic_alloc_intr_resources(struct enic *enic)
595 {
596         int err;
597         unsigned int i;
598
599         dev_info(enic, "vNIC resources used:  "\
600                 "wq %d rq %d cq %d intr %d\n",
601                 enic->wq_count, enic_vnic_rq_count(enic),
602                 enic->cq_count, enic->intr_count);
603
604         for (i = 0; i < enic->intr_count; i++) {
605                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
606                 if (err) {
607                         enic_free_vnic_resources(enic);
608                         return err;
609                 }
610         }
611         return 0;
612 }
613
614 void enic_free_rq(void *rxq)
615 {
616         struct vnic_rq *rq_sop, *rq_data;
617         struct enic *enic;
618
619         if (rxq == NULL)
620                 return;
621
622         rq_sop = (struct vnic_rq *)rxq;
623         enic = vnic_dev_priv(rq_sop->vdev);
624         rq_data = &enic->rq[rq_sop->data_queue_idx];
625
626         enic_rxmbuf_queue_release(enic, rq_sop);
627         if (rq_data->in_use)
628                 enic_rxmbuf_queue_release(enic, rq_data);
629
630         rte_free(rq_sop->mbuf_ring);
631         if (rq_data->in_use)
632                 rte_free(rq_data->mbuf_ring);
633
634         rq_sop->mbuf_ring = NULL;
635         rq_data->mbuf_ring = NULL;
636
637         vnic_rq_free(rq_sop);
638         if (rq_data->in_use)
639                 vnic_rq_free(rq_data);
640
641         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
642
643         rq_sop->in_use = 0;
644         rq_data->in_use = 0;
645 }
646
647 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
648 {
649         struct rte_eth_dev *eth_dev = enic->rte_dev;
650         vnic_wq_enable(&enic->wq[queue_idx]);
651         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
652 }
653
654 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
655 {
656         struct rte_eth_dev *eth_dev = enic->rte_dev;
657         int ret;
658
659         ret = vnic_wq_disable(&enic->wq[queue_idx]);
660         if (ret)
661                 return ret;
662
663         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
664         return 0;
665 }
666
667 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
668 {
669         struct vnic_rq *rq_sop;
670         struct vnic_rq *rq_data;
671         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
672         rq_data = &enic->rq[rq_sop->data_queue_idx];
673         struct rte_eth_dev *eth_dev = enic->rte_dev;
674
675         if (rq_data->in_use) {
676                 vnic_rq_enable(rq_data);
677                 enic_initial_post_rx(enic, rq_data);
678         }
679         rte_mb();
680         vnic_rq_enable(rq_sop);
681         enic_initial_post_rx(enic, rq_sop);
682         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
683 }
684
685 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
686 {
687         int ret1 = 0, ret2 = 0;
688         struct rte_eth_dev *eth_dev = enic->rte_dev;
689         struct vnic_rq *rq_sop;
690         struct vnic_rq *rq_data;
691         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
692         rq_data = &enic->rq[rq_sop->data_queue_idx];
693
694         ret2 = vnic_rq_disable(rq_sop);
695         rte_mb();
696         if (rq_data->in_use)
697                 ret1 = vnic_rq_disable(rq_data);
698
699         if (ret2)
700                 return ret2;
701         else if (ret1)
702                 return ret1;
703
704         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
705         return 0;
706 }
707
708 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
709         unsigned int socket_id, struct rte_mempool *mp,
710         uint16_t nb_desc, uint16_t free_thresh)
711 {
712         int rc;
713         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
714         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
715         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
716         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
717         unsigned int mbuf_size, mbufs_per_pkt;
718         unsigned int nb_sop_desc, nb_data_desc;
719         uint16_t min_sop, max_sop, min_data, max_data;
720         uint32_t max_rx_pkt_len;
721
722         rq_sop->is_sop = 1;
723         rq_sop->data_queue_idx = data_queue_idx;
724         rq_data->is_sop = 0;
725         rq_data->data_queue_idx = 0;
726         rq_sop->socket_id = socket_id;
727         rq_sop->mp = mp;
728         rq_data->socket_id = socket_id;
729         rq_data->mp = mp;
730         rq_sop->in_use = 1;
731         rq_sop->rx_free_thresh = free_thresh;
732         rq_data->rx_free_thresh = free_thresh;
733         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
734                   free_thresh);
735
736         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
737                                RTE_PKTMBUF_HEADROOM);
738         /* max_rx_pkt_len includes the ethernet header and CRC. */
739         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
740
741         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
742             DEV_RX_OFFLOAD_SCATTER) {
743                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
744                 /* ceil((max pkt len)/mbuf_size) */
745                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
746         } else {
747                 dev_info(enic, "Scatter rx mode disabled\n");
748                 mbufs_per_pkt = 1;
749                 if (max_rx_pkt_len > mbuf_size) {
750                         dev_warning(enic, "The maximum Rx packet size (%u) is"
751                                     " larger than the mbuf size (%u), and"
752                                     " scatter is disabled. Larger packets will"
753                                     " be truncated.\n",
754                                     max_rx_pkt_len, mbuf_size);
755                 }
756         }
757
758         if (mbufs_per_pkt > 1) {
759                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
760                 rq_sop->data_queue_enable = 1;
761                 rq_data->in_use = 1;
762                 /*
763                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
764                  * receives packet sizes up to the "max" MTU.
765                  * If not using scatter, we can achieve the effect of dropping
766                  * larger packets by reducing the size of posted buffers.
767                  * See enic_alloc_rx_queue_mbufs().
768                  */
769                 if (max_rx_pkt_len <
770                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
771                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
772                                     " when scatter rx mode is in use.\n");
773                 }
774         } else {
775                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
776                          queue_idx);
777                 rq_sop->data_queue_enable = 0;
778                 rq_data->in_use = 0;
779         }
780
781         /* number of descriptors have to be a multiple of 32 */
782         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
783         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
784
785         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
786         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
787
788         if (mbufs_per_pkt > 1) {
789                 min_sop = 64;
790                 max_sop = ((enic->config.rq_desc_count /
791                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
792                 min_data = min_sop * (mbufs_per_pkt - 1);
793                 max_data = enic->config.rq_desc_count;
794         } else {
795                 min_sop = 64;
796                 max_sop = enic->config.rq_desc_count;
797                 min_data = 0;
798                 max_data = 0;
799         }
800
801         if (nb_desc < (min_sop + min_data)) {
802                 dev_warning(enic,
803                             "Number of rx descs too low, adjusting to minimum\n");
804                 nb_sop_desc = min_sop;
805                 nb_data_desc = min_data;
806         } else if (nb_desc > (max_sop + max_data)) {
807                 dev_warning(enic,
808                             "Number of rx_descs too high, adjusting to maximum\n");
809                 nb_sop_desc = max_sop;
810                 nb_data_desc = max_data;
811         }
812         if (mbufs_per_pkt > 1) {
813                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
814                          " rx descriptor range is %u to %u\n",
815                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
816                          max_sop + max_data);
817         }
818         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
819                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
820
821         /* Allocate sop queue resources */
822         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
823                 nb_sop_desc, sizeof(struct rq_enet_desc));
824         if (rc) {
825                 dev_err(enic, "error in allocation of sop rq\n");
826                 goto err_exit;
827         }
828         nb_sop_desc = rq_sop->ring.desc_count;
829
830         if (rq_data->in_use) {
831                 /* Allocate data queue resources */
832                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
833                                    nb_data_desc,
834                                    sizeof(struct rq_enet_desc));
835                 if (rc) {
836                         dev_err(enic, "error in allocation of data rq\n");
837                         goto err_free_rq_sop;
838                 }
839                 nb_data_desc = rq_data->ring.desc_count;
840         }
841         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
842                            socket_id, nb_sop_desc + nb_data_desc,
843                            sizeof(struct cq_enet_rq_desc));
844         if (rc) {
845                 dev_err(enic, "error in allocation of cq for rq\n");
846                 goto err_free_rq_data;
847         }
848
849         /* Allocate the mbuf rings */
850         rq_sop->mbuf_ring = (struct rte_mbuf **)
851                 rte_zmalloc_socket("rq->mbuf_ring",
852                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
853                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
854         if (rq_sop->mbuf_ring == NULL)
855                 goto err_free_cq;
856
857         if (rq_data->in_use) {
858                 rq_data->mbuf_ring = (struct rte_mbuf **)
859                         rte_zmalloc_socket("rq->mbuf_ring",
860                                 sizeof(struct rte_mbuf *) * nb_data_desc,
861                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
862                 if (rq_data->mbuf_ring == NULL)
863                         goto err_free_sop_mbuf;
864         }
865
866         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
867
868         return 0;
869
870 err_free_sop_mbuf:
871         rte_free(rq_sop->mbuf_ring);
872 err_free_cq:
873         /* cleanup on error */
874         vnic_cq_free(&enic->cq[queue_idx]);
875 err_free_rq_data:
876         if (rq_data->in_use)
877                 vnic_rq_free(rq_data);
878 err_free_rq_sop:
879         vnic_rq_free(rq_sop);
880 err_exit:
881         return -ENOMEM;
882 }
883
884 void enic_free_wq(void *txq)
885 {
886         struct vnic_wq *wq;
887         struct enic *enic;
888
889         if (txq == NULL)
890                 return;
891
892         wq = (struct vnic_wq *)txq;
893         enic = vnic_dev_priv(wq->vdev);
894         rte_memzone_free(wq->cqmsg_rz);
895         vnic_wq_free(wq);
896         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
897 }
898
899 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
900         unsigned int socket_id, uint16_t nb_desc)
901 {
902         int err;
903         struct vnic_wq *wq = &enic->wq[queue_idx];
904         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
905         char name[NAME_MAX];
906         static int instance;
907
908         wq->socket_id = socket_id;
909         /*
910          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
911          * print an info message for diagnostics.
912          */
913         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
914
915         /* Allocate queue resources */
916         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
917                 nb_desc,
918                 sizeof(struct wq_enet_desc));
919         if (err) {
920                 dev_err(enic, "error in allocation of wq\n");
921                 return err;
922         }
923
924         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
925                 socket_id, nb_desc,
926                 sizeof(struct cq_enet_wq_desc));
927         if (err) {
928                 vnic_wq_free(wq);
929                 dev_err(enic, "error in allocation of cq for wq\n");
930         }
931
932         /* setup up CQ message */
933         snprintf((char *)name, sizeof(name),
934                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
935                 instance++);
936
937         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
938                         sizeof(uint32_t), SOCKET_ID_ANY,
939                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
940         if (!wq->cqmsg_rz)
941                 return -ENOMEM;
942
943         return err;
944 }
945
946 int enic_disable(struct enic *enic)
947 {
948         unsigned int i;
949         int err;
950
951         for (i = 0; i < enic->intr_count; i++) {
952                 vnic_intr_mask(&enic->intr[i]);
953                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
954         }
955         enic_rxq_intr_deinit(enic);
956         rte_intr_disable(&enic->pdev->intr_handle);
957         rte_intr_callback_unregister(&enic->pdev->intr_handle,
958                                      enic_intr_handler,
959                                      (void *)enic->rte_dev);
960
961         vnic_dev_disable(enic->vdev);
962
963         enic_clsf_destroy(enic);
964
965         if (!enic_is_sriov_vf(enic))
966                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
967
968         for (i = 0; i < enic->wq_count; i++) {
969                 err = vnic_wq_disable(&enic->wq[i]);
970                 if (err)
971                         return err;
972         }
973         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
974                 if (enic->rq[i].in_use) {
975                         err = vnic_rq_disable(&enic->rq[i]);
976                         if (err)
977                                 return err;
978                 }
979         }
980
981         /* If we were using interrupts, set the interrupt vector to -1
982          * to disable interrupts.  We are not disabling link notifcations,
983          * though, as we want the polling of link status to continue working.
984          */
985         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
986                 vnic_dev_notify_set(enic->vdev, -1);
987
988         vnic_dev_set_reset_flag(enic->vdev, 1);
989
990         for (i = 0; i < enic->wq_count; i++)
991                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
992
993         for (i = 0; i < enic_vnic_rq_count(enic); i++)
994                 if (enic->rq[i].in_use)
995                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
996         for (i = 0; i < enic->cq_count; i++)
997                 vnic_cq_clean(&enic->cq[i]);
998         for (i = 0; i < enic->intr_count; i++)
999                 vnic_intr_clean(&enic->intr[i]);
1000
1001         return 0;
1002 }
1003
1004 static int enic_dev_wait(struct vnic_dev *vdev,
1005         int (*start)(struct vnic_dev *, int),
1006         int (*finished)(struct vnic_dev *, int *),
1007         int arg)
1008 {
1009         int done;
1010         int err;
1011         int i;
1012
1013         err = start(vdev, arg);
1014         if (err)
1015                 return err;
1016
1017         /* Wait for func to complete...2 seconds max */
1018         for (i = 0; i < 2000; i++) {
1019                 err = finished(vdev, &done);
1020                 if (err)
1021                         return err;
1022                 if (done)
1023                         return 0;
1024                 usleep(1000);
1025         }
1026         return -ETIMEDOUT;
1027 }
1028
1029 static int enic_dev_open(struct enic *enic)
1030 {
1031         int err;
1032         int flags = CMD_OPENF_IG_DESCCACHE;
1033
1034         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1035                 vnic_dev_open_done, flags);
1036         if (err)
1037                 dev_err(enic_get_dev(enic),
1038                         "vNIC device open failed, err %d\n", err);
1039
1040         return err;
1041 }
1042
1043 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1044 {
1045         dma_addr_t rss_key_buf_pa;
1046         union vnic_rss_key *rss_key_buf_va = NULL;
1047         int err, i;
1048         u8 name[NAME_MAX];
1049
1050         RTE_ASSERT(user_key != NULL);
1051         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1052         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1053                 &rss_key_buf_pa, name);
1054         if (!rss_key_buf_va)
1055                 return -ENOMEM;
1056
1057         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1058                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1059
1060         err = enic_set_rss_key(enic,
1061                 rss_key_buf_pa,
1062                 sizeof(union vnic_rss_key));
1063
1064         /* Save for later queries */
1065         if (!err) {
1066                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1067                            sizeof(union vnic_rss_key));
1068         }
1069         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1070                 rss_key_buf_va, rss_key_buf_pa);
1071
1072         return err;
1073 }
1074
1075 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1076 {
1077         dma_addr_t rss_cpu_buf_pa;
1078         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1079         int err;
1080         u8 name[NAME_MAX];
1081
1082         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1083         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1084                 &rss_cpu_buf_pa, name);
1085         if (!rss_cpu_buf_va)
1086                 return -ENOMEM;
1087
1088         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1089
1090         err = enic_set_rss_cpu(enic,
1091                 rss_cpu_buf_pa,
1092                 sizeof(union vnic_rss_cpu));
1093
1094         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1095                 rss_cpu_buf_va, rss_cpu_buf_pa);
1096
1097         /* Save for later queries */
1098         if (!err)
1099                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1100         return err;
1101 }
1102
1103 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1104         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1105 {
1106         const u8 tso_ipid_split_en = 0;
1107         int err;
1108
1109         err = enic_set_nic_cfg(enic,
1110                 rss_default_cpu, rss_hash_type,
1111                 rss_hash_bits, rss_base_cpu,
1112                 rss_enable, tso_ipid_split_en,
1113                 enic->ig_vlan_strip_en);
1114
1115         return err;
1116 }
1117
1118 /* Initialize RSS with defaults, called from dev_configure */
1119 int enic_init_rss_nic_cfg(struct enic *enic)
1120 {
1121         static uint8_t default_rss_key[] = {
1122                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1123                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1124                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1125                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1126         };
1127         struct rte_eth_rss_conf rss_conf;
1128         union vnic_rss_cpu rss_cpu;
1129         int ret, i;
1130
1131         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1132         /*
1133          * If setting key for the first time, and the user gives us none, then
1134          * push the default key to NIC.
1135          */
1136         if (rss_conf.rss_key == NULL) {
1137                 rss_conf.rss_key = default_rss_key;
1138                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1139         }
1140         ret = enic_set_rss_conf(enic, &rss_conf);
1141         if (ret) {
1142                 dev_err(enic, "Failed to configure RSS\n");
1143                 return ret;
1144         }
1145         if (enic->rss_enable) {
1146                 /* If enabling RSS, use the default reta */
1147                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1148                         rss_cpu.cpu[i / 4].b[i % 4] =
1149                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1150                 }
1151                 ret = enic_set_rss_reta(enic, &rss_cpu);
1152                 if (ret)
1153                         dev_err(enic, "Failed to set RSS indirection table\n");
1154         }
1155         return ret;
1156 }
1157
1158 int enic_setup_finish(struct enic *enic)
1159 {
1160         enic_init_soft_stats(enic);
1161
1162         /* Default conf */
1163         vnic_dev_packet_filter(enic->vdev,
1164                 1 /* directed  */,
1165                 1 /* multicast */,
1166                 1 /* broadcast */,
1167                 0 /* promisc   */,
1168                 1 /* allmulti  */);
1169
1170         enic->promisc = 0;
1171         enic->allmulti = 1;
1172
1173         return 0;
1174 }
1175
1176 static int enic_rss_conf_valid(struct enic *enic,
1177                                struct rte_eth_rss_conf *rss_conf)
1178 {
1179         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1180         if (enic->flow_type_rss_offloads == 0)
1181                 return 0;
1182         if (rss_conf->rss_key != NULL &&
1183             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1184                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1185                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1186                 return -EINVAL;
1187         }
1188         if (rss_conf->rss_hf != 0 &&
1189             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1190                 dev_err(enic, "Given rss_hf contains none of the supported"
1191                         " types\n");
1192                 return -EINVAL;
1193         }
1194         return 0;
1195 }
1196
1197 /* Set hash type and key according to rss_conf */
1198 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1199 {
1200         struct rte_eth_dev *eth_dev;
1201         uint64_t rss_hf;
1202         u8 rss_hash_type;
1203         u8 rss_enable;
1204         int ret;
1205
1206         RTE_ASSERT(rss_conf != NULL);
1207         ret = enic_rss_conf_valid(enic, rss_conf);
1208         if (ret) {
1209                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1210                 return ret;
1211         }
1212
1213         eth_dev = enic->rte_dev;
1214         rss_hash_type = 0;
1215         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1216         if (enic->rq_count > 1 &&
1217             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1218             rss_hf != 0) {
1219                 rss_enable = 1;
1220                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1221                               ETH_RSS_NONFRAG_IPV4_OTHER))
1222                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1223                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1224                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1225                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1226                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1227                         if (enic->udp_rss_weak) {
1228                                 /*
1229                                  * 'TCP' is not a typo. The "weak" version of
1230                                  * UDP RSS requires both the TCP and UDP bits
1231                                  * be set. It does enable TCP RSS as well.
1232                                  */
1233                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1234                         }
1235                 }
1236                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1237                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1238                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1239                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1240                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1241                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1242                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1243                         if (enic->udp_rss_weak)
1244                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1245                 }
1246         } else {
1247                 rss_enable = 0;
1248                 rss_hf = 0;
1249         }
1250
1251         /* Set the hash key if provided */
1252         if (rss_enable && rss_conf->rss_key) {
1253                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1254                 if (ret) {
1255                         dev_err(enic, "Failed to set RSS key\n");
1256                         return ret;
1257                 }
1258         }
1259
1260         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1261                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1262                               rss_enable);
1263         if (!ret) {
1264                 enic->rss_hf = rss_hf;
1265                 enic->rss_hash_type = rss_hash_type;
1266                 enic->rss_enable = rss_enable;
1267         } else {
1268                 dev_err(enic, "Failed to update RSS configurations."
1269                         " hash=0x%x\n", rss_hash_type);
1270         }
1271         return ret;
1272 }
1273
1274 int enic_set_vlan_strip(struct enic *enic)
1275 {
1276         /*
1277          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1278          * together. So, re-do niccfg, preserving the current RSS settings.
1279          */
1280         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1281                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1282                                enic->rss_enable);
1283 }
1284
1285 void enic_add_packet_filter(struct enic *enic)
1286 {
1287         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1288         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1289                 enic->promisc, enic->allmulti);
1290 }
1291
1292 int enic_get_link_status(struct enic *enic)
1293 {
1294         return vnic_dev_link_status(enic->vdev);
1295 }
1296
1297 static void enic_dev_deinit(struct enic *enic)
1298 {
1299         struct rte_eth_dev *eth_dev = enic->rte_dev;
1300
1301         /* stop link status checking */
1302         vnic_dev_notify_unset(enic->vdev);
1303
1304         rte_free(eth_dev->data->mac_addrs);
1305         rte_free(enic->cq);
1306         rte_free(enic->intr);
1307         rte_free(enic->rq);
1308         rte_free(enic->wq);
1309 }
1310
1311
1312 int enic_set_vnic_res(struct enic *enic)
1313 {
1314         struct rte_eth_dev *eth_dev = enic->rte_dev;
1315         int rc = 0;
1316         unsigned int required_rq, required_wq, required_cq, required_intr;
1317
1318         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1319         required_rq = eth_dev->data->nb_rx_queues * 2;
1320         required_wq = eth_dev->data->nb_tx_queues;
1321         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1322         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1323         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1324                 required_intr += eth_dev->data->nb_rx_queues;
1325         }
1326
1327         if (enic->conf_rq_count < required_rq) {
1328                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1329                         eth_dev->data->nb_rx_queues,
1330                         required_rq, enic->conf_rq_count);
1331                 rc = -EINVAL;
1332         }
1333         if (enic->conf_wq_count < required_wq) {
1334                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1335                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1336                 rc = -EINVAL;
1337         }
1338
1339         if (enic->conf_cq_count < required_cq) {
1340                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1341                         required_cq, enic->conf_cq_count);
1342                 rc = -EINVAL;
1343         }
1344         if (enic->conf_intr_count < required_intr) {
1345                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1346                         " interrupts. Required:%u, Configured:%u\n",
1347                         required_intr, enic->conf_intr_count);
1348                 rc = -EINVAL;
1349         }
1350
1351         if (rc == 0) {
1352                 enic->rq_count = eth_dev->data->nb_rx_queues;
1353                 enic->wq_count = eth_dev->data->nb_tx_queues;
1354                 enic->cq_count = enic->rq_count + enic->wq_count;
1355                 enic->intr_count = required_intr;
1356         }
1357
1358         return rc;
1359 }
1360
1361 /* Initialize the completion queue for an RQ */
1362 static int
1363 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1364 {
1365         struct vnic_rq *sop_rq, *data_rq;
1366         unsigned int cq_idx;
1367         int rc = 0;
1368
1369         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1370         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1371         cq_idx = rq_idx;
1372
1373         vnic_cq_clean(&enic->cq[cq_idx]);
1374         vnic_cq_init(&enic->cq[cq_idx],
1375                      0 /* flow_control_enable */,
1376                      1 /* color_enable */,
1377                      0 /* cq_head */,
1378                      0 /* cq_tail */,
1379                      1 /* cq_tail_color */,
1380                      0 /* interrupt_enable */,
1381                      1 /* cq_entry_enable */,
1382                      0 /* cq_message_enable */,
1383                      0 /* interrupt offset */,
1384                      0 /* cq_message_addr */);
1385
1386
1387         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1388                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1389                            sop_rq->ring.desc_count - 1, 1, 0);
1390         if (data_rq->in_use) {
1391                 vnic_rq_init_start(data_rq,
1392                                    enic_cq_rq(enic,
1393                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1394                                    data_rq->ring.desc_count - 1, 1, 0);
1395         }
1396
1397         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1398         if (rc)
1399                 return rc;
1400
1401         if (data_rq->in_use) {
1402                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1403                 if (rc) {
1404                         enic_rxmbuf_queue_release(enic, sop_rq);
1405                         return rc;
1406                 }
1407         }
1408
1409         return 0;
1410 }
1411
1412 /* The Cisco NIC can send and receive packets up to a max packet size
1413  * determined by the NIC type and firmware. There is also an MTU
1414  * configured into the NIC via the CIMC/UCSM management interface
1415  * which can be overridden by this function (up to the max packet size).
1416  * Depending on the network setup, doing so may cause packet drops
1417  * and unexpected behavior.
1418  */
1419 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1420 {
1421         unsigned int rq_idx;
1422         struct vnic_rq *rq;
1423         int rc = 0;
1424         uint16_t old_mtu;       /* previous setting */
1425         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1426         struct rte_eth_dev *eth_dev = enic->rte_dev;
1427
1428         old_mtu = eth_dev->data->mtu;
1429         config_mtu = enic->config.mtu;
1430
1431         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1432                 return -E_RTE_SECONDARY;
1433
1434         if (new_mtu > enic->max_mtu) {
1435                 dev_err(enic,
1436                         "MTU not updated: requested (%u) greater than max (%u)\n",
1437                         new_mtu, enic->max_mtu);
1438                 return -EINVAL;
1439         }
1440         if (new_mtu < ENIC_MIN_MTU) {
1441                 dev_info(enic,
1442                         "MTU not updated: requested (%u) less than min (%u)\n",
1443                         new_mtu, ENIC_MIN_MTU);
1444                 return -EINVAL;
1445         }
1446         if (new_mtu > config_mtu)
1447                 dev_warning(enic,
1448                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1449                         new_mtu, config_mtu);
1450
1451         /* Update the MTU and maximum packet length */
1452         eth_dev->data->mtu = new_mtu;
1453         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1454                 enic_mtu_to_max_rx_pktlen(new_mtu);
1455
1456         /*
1457          * If the device has not started (enic_enable), nothing to do.
1458          * Later, enic_enable() will set up RQs reflecting the new maximum
1459          * packet length.
1460          */
1461         if (!eth_dev->data->dev_started)
1462                 goto set_mtu_done;
1463
1464         /*
1465          * The device has started, re-do RQs on the fly. In the process, we
1466          * pick up the new maximum packet length.
1467          *
1468          * Some applications rely on the ability to change MTU without stopping
1469          * the device. So keep this behavior for now.
1470          */
1471         rte_spinlock_lock(&enic->mtu_lock);
1472
1473         /* Stop traffic on all RQs */
1474         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1475                 rq = &enic->rq[rq_idx];
1476                 if (rq->is_sop && rq->in_use) {
1477                         rc = enic_stop_rq(enic,
1478                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1479                         if (rc) {
1480                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1481                                 goto set_mtu_done;
1482                         }
1483                 }
1484         }
1485
1486         /* replace Rx function with a no-op to avoid getting stale pkts */
1487         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1488         rte_mb();
1489
1490         /* Allow time for threads to exit the real Rx function. */
1491         usleep(100000);
1492
1493         /* now it is safe to reconfigure the RQs */
1494
1495
1496         /* free and reallocate RQs with the new MTU */
1497         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1498                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1499                 if (!rq->in_use)
1500                         continue;
1501
1502                 enic_free_rq(rq);
1503                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1504                                    rq->tot_nb_desc, rq->rx_free_thresh);
1505                 if (rc) {
1506                         dev_err(enic,
1507                                 "Fatal MTU alloc error- No traffic will pass\n");
1508                         goto set_mtu_done;
1509                 }
1510
1511                 rc = enic_reinit_rq(enic, rq_idx);
1512                 if (rc) {
1513                         dev_err(enic,
1514                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1515                         goto set_mtu_done;
1516                 }
1517         }
1518
1519         /* put back the real receive function */
1520         rte_mb();
1521         eth_dev->rx_pkt_burst = enic_recv_pkts;
1522         rte_mb();
1523
1524         /* restart Rx traffic */
1525         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1526                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1527                 if (rq->is_sop && rq->in_use)
1528                         enic_start_rq(enic, rq_idx);
1529         }
1530
1531 set_mtu_done:
1532         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1533         rte_spinlock_unlock(&enic->mtu_lock);
1534         return rc;
1535 }
1536
1537 static int enic_dev_init(struct enic *enic)
1538 {
1539         int err;
1540         struct rte_eth_dev *eth_dev = enic->rte_dev;
1541
1542         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1543
1544         /* Get vNIC configuration
1545         */
1546         err = enic_get_vnic_config(enic);
1547         if (err) {
1548                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1549                 return err;
1550         }
1551
1552         /* Get available resource counts */
1553         enic_get_res_counts(enic);
1554         if (enic->conf_rq_count == 1) {
1555                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1556                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1557                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1558                 return -EINVAL;
1559         }
1560         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1561         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1562                                enic->conf_cq_count, 8);
1563         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1564                                  enic->conf_intr_count, 8);
1565         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1566                                enic->conf_rq_count, 8);
1567         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1568                                enic->conf_wq_count, 8);
1569         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1570                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1571                 return -1;
1572         }
1573         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1574                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1575                 return -1;
1576         }
1577         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1578                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1579                 return -1;
1580         }
1581         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1582                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1583                 return -1;
1584         }
1585
1586         /* Get the supported filters */
1587         enic_fdir_info(enic);
1588
1589         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1590                                                 * ENIC_MAX_MAC_ADDR, 0);
1591         if (!eth_dev->data->mac_addrs) {
1592                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1593                 return -1;
1594         }
1595         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1596                         eth_dev->data->mac_addrs);
1597
1598         vnic_dev_set_reset_flag(enic->vdev, 0);
1599
1600         LIST_INIT(&enic->flows);
1601         rte_spinlock_init(&enic->flows_lock);
1602
1603         /* set up link status checking */
1604         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1605
1606         enic->overlay_offload = false;
1607         if (!enic->disable_overlay && enic->vxlan &&
1608             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1609             vnic_dev_overlay_offload_ctrl(enic->vdev,
1610                                           OVERLAY_FEATURE_VXLAN,
1611                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1612                 enic->tx_offload_capa |=
1613                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1614                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1615                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1616                 /*
1617                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1618                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1619                  */
1620                 enic->tx_offload_mask |=
1621                         PKT_TX_OUTER_IP_CKSUM |
1622                         PKT_TX_TUNNEL_MASK;
1623                 enic->overlay_offload = true;
1624                 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1625                 dev_info(enic, "Overlay offload is enabled\n");
1626         }
1627
1628         return 0;
1629
1630 }
1631
1632 int enic_probe(struct enic *enic)
1633 {
1634         struct rte_pci_device *pdev = enic->pdev;
1635         int err = -1;
1636
1637         dev_debug(enic, " Initializing ENIC PMD\n");
1638
1639         /* if this is a secondary process the hardware is already initialized */
1640         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1641                 return 0;
1642
1643         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1644         enic->bar0.len = pdev->mem_resource[0].len;
1645
1646         /* Register vNIC device */
1647         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1648         if (!enic->vdev) {
1649                 dev_err(enic, "vNIC registration failed, aborting\n");
1650                 goto err_out;
1651         }
1652
1653         LIST_INIT(&enic->memzone_list);
1654         rte_spinlock_init(&enic->memzone_list_lock);
1655
1656         vnic_register_cbacks(enic->vdev,
1657                 enic_alloc_consistent,
1658                 enic_free_consistent);
1659
1660         /*
1661          * Allocate the consistent memory for stats upfront so both primary and
1662          * secondary processes can dump stats.
1663          */
1664         err = vnic_dev_alloc_stats_mem(enic->vdev);
1665         if (err) {
1666                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1667                 goto err_out_unregister;
1668         }
1669         /* Issue device open to get device in known state */
1670         err = enic_dev_open(enic);
1671         if (err) {
1672                 dev_err(enic, "vNIC dev open failed, aborting\n");
1673                 goto err_out_unregister;
1674         }
1675
1676         /* Set ingress vlan rewrite mode before vnic initialization */
1677         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1678                   enic->ig_vlan_rewrite_mode);
1679         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1680                 enic->ig_vlan_rewrite_mode);
1681         if (err) {
1682                 dev_err(enic,
1683                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1684                 goto err_out_dev_close;
1685         }
1686
1687         /* Issue device init to initialize the vnic-to-switch link.
1688          * We'll start with carrier off and wait for link UP
1689          * notification later to turn on carrier.  We don't need
1690          * to wait here for the vnic-to-switch link initialization
1691          * to complete; link UP notification is the indication that
1692          * the process is complete.
1693          */
1694
1695         err = vnic_dev_init(enic->vdev, 0);
1696         if (err) {
1697                 dev_err(enic, "vNIC dev init failed, aborting\n");
1698                 goto err_out_dev_close;
1699         }
1700
1701         err = enic_dev_init(enic);
1702         if (err) {
1703                 dev_err(enic, "Device initialization failed, aborting\n");
1704                 goto err_out_dev_close;
1705         }
1706
1707         return 0;
1708
1709 err_out_dev_close:
1710         vnic_dev_close(enic->vdev);
1711 err_out_unregister:
1712         vnic_dev_unregister(enic->vdev);
1713 err_out:
1714         return err;
1715 }
1716
1717 void enic_remove(struct enic *enic)
1718 {
1719         enic_dev_deinit(enic);
1720         vnic_dev_close(enic->vdev);
1721         vnic_dev_unregister(enic->vdev);
1722 }