net/enic: pick the right Rx handler after changing MTU
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74         struct rte_mbuf *mbuf = *buf;
75
76         rte_pktmbuf_free_seg(mbuf);
77         *buf = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
497 {
498         struct wq_enet_desc *desc;
499         struct vnic_wq *wq;
500         unsigned int i;
501
502         /*
503          * Fill WQ descriptor fields that never change. Every descriptor is
504          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
505          * descriptors (i.e. request one completion update every 32 packets).
506          */
507         wq = &enic->wq[queue_idx];
508         desc = (struct wq_enet_desc *)wq->ring.descs;
509         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
510                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
511                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
512                         desc->header_length_flags |=
513                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
514         }
515 }
516
517 static void pick_rx_handler(struct enic *enic)
518 {
519         struct rte_eth_dev *eth_dev;
520
521         /* Use the non-scatter, simplified RX handler if possible. */
522         eth_dev = enic->rte_dev;
523         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
524                 PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
525                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
526         } else {
527                 PMD_INIT_LOG(DEBUG, " use the normal Rx handler");
528                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
529         }
530 }
531
532 int enic_enable(struct enic *enic)
533 {
534         unsigned int index;
535         int err;
536         struct rte_eth_dev *eth_dev = enic->rte_dev;
537
538         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
539         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
540
541         /* vnic notification of link status has already been turned on in
542          * enic_dev_init() which is called during probe time.  Here we are
543          * just turning on interrupt vector 0 if needed.
544          */
545         if (eth_dev->data->dev_conf.intr_conf.lsc)
546                 vnic_dev_notify_set(enic->vdev, 0);
547
548         err = enic_rxq_intr_init(enic);
549         if (err)
550                 return err;
551         if (enic_clsf_init(enic))
552                 dev_warning(enic, "Init of hash table for clsf failed."\
553                         "Flow director feature will not work\n");
554
555         for (index = 0; index < enic->rq_count; index++) {
556                 err = enic_alloc_rx_queue_mbufs(enic,
557                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
558                 if (err) {
559                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
560                         return err;
561                 }
562                 err = enic_alloc_rx_queue_mbufs(enic,
563                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
564                 if (err) {
565                         /* release the allocated mbufs for the sop rq*/
566                         enic_rxmbuf_queue_release(enic,
567                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
568
569                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
570                         return err;
571                 }
572         }
573
574         /*
575          * Use the simple TX handler if possible. All offloads must be disabled
576          * except mbuf fast free.
577          */
578         if ((eth_dev->data->dev_conf.txmode.offloads &
579              ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) == 0) {
580                 PMD_INIT_LOG(DEBUG, " use the simple tx handler");
581                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
582                 for (index = 0; index < enic->wq_count; index++)
583                         enic_prep_wq_for_simple_tx(enic, index);
584         } else {
585                 PMD_INIT_LOG(DEBUG, " use the default tx handler");
586                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
587         }
588
589         pick_rx_handler(enic);
590
591         for (index = 0; index < enic->wq_count; index++)
592                 enic_start_wq(enic, index);
593         for (index = 0; index < enic->rq_count; index++)
594                 enic_start_rq(enic, index);
595
596         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
597
598         vnic_dev_enable_wait(enic->vdev);
599
600         /* Register and enable error interrupt */
601         rte_intr_callback_register(&(enic->pdev->intr_handle),
602                 enic_intr_handler, (void *)enic->rte_dev);
603
604         rte_intr_enable(&(enic->pdev->intr_handle));
605         /* Unmask LSC interrupt */
606         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
607
608         return 0;
609 }
610
611 int enic_alloc_intr_resources(struct enic *enic)
612 {
613         int err;
614         unsigned int i;
615
616         dev_info(enic, "vNIC resources used:  "\
617                 "wq %d rq %d cq %d intr %d\n",
618                 enic->wq_count, enic_vnic_rq_count(enic),
619                 enic->cq_count, enic->intr_count);
620
621         for (i = 0; i < enic->intr_count; i++) {
622                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
623                 if (err) {
624                         enic_free_vnic_resources(enic);
625                         return err;
626                 }
627         }
628         return 0;
629 }
630
631 void enic_free_rq(void *rxq)
632 {
633         struct vnic_rq *rq_sop, *rq_data;
634         struct enic *enic;
635
636         if (rxq == NULL)
637                 return;
638
639         rq_sop = (struct vnic_rq *)rxq;
640         enic = vnic_dev_priv(rq_sop->vdev);
641         rq_data = &enic->rq[rq_sop->data_queue_idx];
642
643         if (rq_sop->free_mbufs) {
644                 struct rte_mbuf **mb;
645                 int i;
646
647                 mb = rq_sop->free_mbufs;
648                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
649                      i < ENIC_RX_BURST_MAX; i++)
650                         rte_pktmbuf_free(mb[i]);
651                 rte_free(rq_sop->free_mbufs);
652                 rq_sop->free_mbufs = NULL;
653                 rq_sop->num_free_mbufs = 0;
654         }
655
656         enic_rxmbuf_queue_release(enic, rq_sop);
657         if (rq_data->in_use)
658                 enic_rxmbuf_queue_release(enic, rq_data);
659
660         rte_free(rq_sop->mbuf_ring);
661         if (rq_data->in_use)
662                 rte_free(rq_data->mbuf_ring);
663
664         rq_sop->mbuf_ring = NULL;
665         rq_data->mbuf_ring = NULL;
666
667         vnic_rq_free(rq_sop);
668         if (rq_data->in_use)
669                 vnic_rq_free(rq_data);
670
671         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
672
673         rq_sop->in_use = 0;
674         rq_data->in_use = 0;
675 }
676
677 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
678 {
679         struct rte_eth_dev *eth_dev = enic->rte_dev;
680         vnic_wq_enable(&enic->wq[queue_idx]);
681         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
682 }
683
684 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
685 {
686         struct rte_eth_dev *eth_dev = enic->rte_dev;
687         int ret;
688
689         ret = vnic_wq_disable(&enic->wq[queue_idx]);
690         if (ret)
691                 return ret;
692
693         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
694         return 0;
695 }
696
697 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
698 {
699         struct vnic_rq *rq_sop;
700         struct vnic_rq *rq_data;
701         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
702         rq_data = &enic->rq[rq_sop->data_queue_idx];
703         struct rte_eth_dev *eth_dev = enic->rte_dev;
704
705         if (rq_data->in_use) {
706                 vnic_rq_enable(rq_data);
707                 enic_initial_post_rx(enic, rq_data);
708         }
709         rte_mb();
710         vnic_rq_enable(rq_sop);
711         enic_initial_post_rx(enic, rq_sop);
712         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
713 }
714
715 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
716 {
717         int ret1 = 0, ret2 = 0;
718         struct rte_eth_dev *eth_dev = enic->rte_dev;
719         struct vnic_rq *rq_sop;
720         struct vnic_rq *rq_data;
721         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
722         rq_data = &enic->rq[rq_sop->data_queue_idx];
723
724         ret2 = vnic_rq_disable(rq_sop);
725         rte_mb();
726         if (rq_data->in_use)
727                 ret1 = vnic_rq_disable(rq_data);
728
729         if (ret2)
730                 return ret2;
731         else if (ret1)
732                 return ret1;
733
734         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
735         return 0;
736 }
737
738 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
739         unsigned int socket_id, struct rte_mempool *mp,
740         uint16_t nb_desc, uint16_t free_thresh)
741 {
742         int rc;
743         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
744         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
745         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
746         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
747         unsigned int mbuf_size, mbufs_per_pkt;
748         unsigned int nb_sop_desc, nb_data_desc;
749         uint16_t min_sop, max_sop, min_data, max_data;
750         uint32_t max_rx_pkt_len;
751
752         rq_sop->is_sop = 1;
753         rq_sop->data_queue_idx = data_queue_idx;
754         rq_data->is_sop = 0;
755         rq_data->data_queue_idx = 0;
756         rq_sop->socket_id = socket_id;
757         rq_sop->mp = mp;
758         rq_data->socket_id = socket_id;
759         rq_data->mp = mp;
760         rq_sop->in_use = 1;
761         rq_sop->rx_free_thresh = free_thresh;
762         rq_data->rx_free_thresh = free_thresh;
763         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
764                   free_thresh);
765
766         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
767                                RTE_PKTMBUF_HEADROOM);
768         /* max_rx_pkt_len includes the ethernet header and CRC. */
769         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
770
771         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
772             DEV_RX_OFFLOAD_SCATTER) {
773                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
774                 /* ceil((max pkt len)/mbuf_size) */
775                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
776         } else {
777                 dev_info(enic, "Scatter rx mode disabled\n");
778                 mbufs_per_pkt = 1;
779                 if (max_rx_pkt_len > mbuf_size) {
780                         dev_warning(enic, "The maximum Rx packet size (%u) is"
781                                     " larger than the mbuf size (%u), and"
782                                     " scatter is disabled. Larger packets will"
783                                     " be truncated.\n",
784                                     max_rx_pkt_len, mbuf_size);
785                 }
786         }
787
788         if (mbufs_per_pkt > 1) {
789                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
790                 rq_sop->data_queue_enable = 1;
791                 rq_data->in_use = 1;
792                 /*
793                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
794                  * receives packet sizes up to the "max" MTU.
795                  * If not using scatter, we can achieve the effect of dropping
796                  * larger packets by reducing the size of posted buffers.
797                  * See enic_alloc_rx_queue_mbufs().
798                  */
799                 if (max_rx_pkt_len <
800                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
801                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
802                                     " when scatter rx mode is in use.\n");
803                 }
804         } else {
805                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
806                          queue_idx);
807                 rq_sop->data_queue_enable = 0;
808                 rq_data->in_use = 0;
809         }
810
811         /* number of descriptors have to be a multiple of 32 */
812         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
813         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
814
815         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
816         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
817
818         if (mbufs_per_pkt > 1) {
819                 min_sop = ENIC_RX_BURST_MAX;
820                 max_sop = ((enic->config.rq_desc_count /
821                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
822                 min_data = min_sop * (mbufs_per_pkt - 1);
823                 max_data = enic->config.rq_desc_count;
824         } else {
825                 min_sop = ENIC_RX_BURST_MAX;
826                 max_sop = enic->config.rq_desc_count;
827                 min_data = 0;
828                 max_data = 0;
829         }
830
831         if (nb_desc < (min_sop + min_data)) {
832                 dev_warning(enic,
833                             "Number of rx descs too low, adjusting to minimum\n");
834                 nb_sop_desc = min_sop;
835                 nb_data_desc = min_data;
836         } else if (nb_desc > (max_sop + max_data)) {
837                 dev_warning(enic,
838                             "Number of rx_descs too high, adjusting to maximum\n");
839                 nb_sop_desc = max_sop;
840                 nb_data_desc = max_data;
841         }
842         if (mbufs_per_pkt > 1) {
843                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
844                          " rx descriptor range is %u to %u\n",
845                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
846                          max_sop + max_data);
847         }
848         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
849                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
850
851         /* Allocate sop queue resources */
852         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
853                 nb_sop_desc, sizeof(struct rq_enet_desc));
854         if (rc) {
855                 dev_err(enic, "error in allocation of sop rq\n");
856                 goto err_exit;
857         }
858         nb_sop_desc = rq_sop->ring.desc_count;
859
860         if (rq_data->in_use) {
861                 /* Allocate data queue resources */
862                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
863                                    nb_data_desc,
864                                    sizeof(struct rq_enet_desc));
865                 if (rc) {
866                         dev_err(enic, "error in allocation of data rq\n");
867                         goto err_free_rq_sop;
868                 }
869                 nb_data_desc = rq_data->ring.desc_count;
870         }
871         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
872                            socket_id, nb_sop_desc + nb_data_desc,
873                            sizeof(struct cq_enet_rq_desc));
874         if (rc) {
875                 dev_err(enic, "error in allocation of cq for rq\n");
876                 goto err_free_rq_data;
877         }
878
879         /* Allocate the mbuf rings */
880         rq_sop->mbuf_ring = (struct rte_mbuf **)
881                 rte_zmalloc_socket("rq->mbuf_ring",
882                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
883                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
884         if (rq_sop->mbuf_ring == NULL)
885                 goto err_free_cq;
886
887         if (rq_data->in_use) {
888                 rq_data->mbuf_ring = (struct rte_mbuf **)
889                         rte_zmalloc_socket("rq->mbuf_ring",
890                                 sizeof(struct rte_mbuf *) * nb_data_desc,
891                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
892                 if (rq_data->mbuf_ring == NULL)
893                         goto err_free_sop_mbuf;
894         }
895
896         rq_sop->free_mbufs = (struct rte_mbuf **)
897                 rte_zmalloc_socket("rq->free_mbufs",
898                                    sizeof(struct rte_mbuf *) *
899                                    ENIC_RX_BURST_MAX,
900                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
901         if (rq_sop->free_mbufs == NULL)
902                 goto err_free_data_mbuf;
903         rq_sop->num_free_mbufs = 0;
904
905         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
906
907         return 0;
908
909 err_free_data_mbuf:
910         rte_free(rq_data->mbuf_ring);
911 err_free_sop_mbuf:
912         rte_free(rq_sop->mbuf_ring);
913 err_free_cq:
914         /* cleanup on error */
915         vnic_cq_free(&enic->cq[queue_idx]);
916 err_free_rq_data:
917         if (rq_data->in_use)
918                 vnic_rq_free(rq_data);
919 err_free_rq_sop:
920         vnic_rq_free(rq_sop);
921 err_exit:
922         return -ENOMEM;
923 }
924
925 void enic_free_wq(void *txq)
926 {
927         struct vnic_wq *wq;
928         struct enic *enic;
929
930         if (txq == NULL)
931                 return;
932
933         wq = (struct vnic_wq *)txq;
934         enic = vnic_dev_priv(wq->vdev);
935         rte_memzone_free(wq->cqmsg_rz);
936         vnic_wq_free(wq);
937         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
938 }
939
940 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
941         unsigned int socket_id, uint16_t nb_desc)
942 {
943         int err;
944         struct vnic_wq *wq = &enic->wq[queue_idx];
945         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
946         char name[NAME_MAX];
947         static int instance;
948
949         wq->socket_id = socket_id;
950         /*
951          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
952          * print an info message for diagnostics.
953          */
954         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
955
956         /* Allocate queue resources */
957         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
958                 nb_desc,
959                 sizeof(struct wq_enet_desc));
960         if (err) {
961                 dev_err(enic, "error in allocation of wq\n");
962                 return err;
963         }
964
965         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
966                 socket_id, nb_desc,
967                 sizeof(struct cq_enet_wq_desc));
968         if (err) {
969                 vnic_wq_free(wq);
970                 dev_err(enic, "error in allocation of cq for wq\n");
971         }
972
973         /* setup up CQ message */
974         snprintf((char *)name, sizeof(name),
975                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
976                 instance++);
977
978         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
979                         sizeof(uint32_t), SOCKET_ID_ANY,
980                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
981         if (!wq->cqmsg_rz)
982                 return -ENOMEM;
983
984         return err;
985 }
986
987 int enic_disable(struct enic *enic)
988 {
989         unsigned int i;
990         int err;
991
992         for (i = 0; i < enic->intr_count; i++) {
993                 vnic_intr_mask(&enic->intr[i]);
994                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
995         }
996         enic_rxq_intr_deinit(enic);
997         rte_intr_disable(&enic->pdev->intr_handle);
998         rte_intr_callback_unregister(&enic->pdev->intr_handle,
999                                      enic_intr_handler,
1000                                      (void *)enic->rte_dev);
1001
1002         vnic_dev_disable(enic->vdev);
1003
1004         enic_clsf_destroy(enic);
1005
1006         if (!enic_is_sriov_vf(enic))
1007                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1008
1009         for (i = 0; i < enic->wq_count; i++) {
1010                 err = vnic_wq_disable(&enic->wq[i]);
1011                 if (err)
1012                         return err;
1013         }
1014         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1015                 if (enic->rq[i].in_use) {
1016                         err = vnic_rq_disable(&enic->rq[i]);
1017                         if (err)
1018                                 return err;
1019                 }
1020         }
1021
1022         /* If we were using interrupts, set the interrupt vector to -1
1023          * to disable interrupts.  We are not disabling link notifcations,
1024          * though, as we want the polling of link status to continue working.
1025          */
1026         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1027                 vnic_dev_notify_set(enic->vdev, -1);
1028
1029         vnic_dev_set_reset_flag(enic->vdev, 1);
1030
1031         for (i = 0; i < enic->wq_count; i++)
1032                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1033
1034         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1035                 if (enic->rq[i].in_use)
1036                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1037         for (i = 0; i < enic->cq_count; i++)
1038                 vnic_cq_clean(&enic->cq[i]);
1039         for (i = 0; i < enic->intr_count; i++)
1040                 vnic_intr_clean(&enic->intr[i]);
1041
1042         return 0;
1043 }
1044
1045 static int enic_dev_wait(struct vnic_dev *vdev,
1046         int (*start)(struct vnic_dev *, int),
1047         int (*finished)(struct vnic_dev *, int *),
1048         int arg)
1049 {
1050         int done;
1051         int err;
1052         int i;
1053
1054         err = start(vdev, arg);
1055         if (err)
1056                 return err;
1057
1058         /* Wait for func to complete...2 seconds max */
1059         for (i = 0; i < 2000; i++) {
1060                 err = finished(vdev, &done);
1061                 if (err)
1062                         return err;
1063                 if (done)
1064                         return 0;
1065                 usleep(1000);
1066         }
1067         return -ETIMEDOUT;
1068 }
1069
1070 static int enic_dev_open(struct enic *enic)
1071 {
1072         int err;
1073         int flags = CMD_OPENF_IG_DESCCACHE;
1074
1075         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1076                 vnic_dev_open_done, flags);
1077         if (err)
1078                 dev_err(enic_get_dev(enic),
1079                         "vNIC device open failed, err %d\n", err);
1080
1081         return err;
1082 }
1083
1084 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1085 {
1086         dma_addr_t rss_key_buf_pa;
1087         union vnic_rss_key *rss_key_buf_va = NULL;
1088         int err, i;
1089         u8 name[NAME_MAX];
1090
1091         RTE_ASSERT(user_key != NULL);
1092         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1093         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1094                 &rss_key_buf_pa, name);
1095         if (!rss_key_buf_va)
1096                 return -ENOMEM;
1097
1098         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1099                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1100
1101         err = enic_set_rss_key(enic,
1102                 rss_key_buf_pa,
1103                 sizeof(union vnic_rss_key));
1104
1105         /* Save for later queries */
1106         if (!err) {
1107                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1108                            sizeof(union vnic_rss_key));
1109         }
1110         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1111                 rss_key_buf_va, rss_key_buf_pa);
1112
1113         return err;
1114 }
1115
1116 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1117 {
1118         dma_addr_t rss_cpu_buf_pa;
1119         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1120         int err;
1121         u8 name[NAME_MAX];
1122
1123         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1124         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1125                 &rss_cpu_buf_pa, name);
1126         if (!rss_cpu_buf_va)
1127                 return -ENOMEM;
1128
1129         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1130
1131         err = enic_set_rss_cpu(enic,
1132                 rss_cpu_buf_pa,
1133                 sizeof(union vnic_rss_cpu));
1134
1135         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1136                 rss_cpu_buf_va, rss_cpu_buf_pa);
1137
1138         /* Save for later queries */
1139         if (!err)
1140                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1141         return err;
1142 }
1143
1144 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1145         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1146 {
1147         const u8 tso_ipid_split_en = 0;
1148         int err;
1149
1150         err = enic_set_nic_cfg(enic,
1151                 rss_default_cpu, rss_hash_type,
1152                 rss_hash_bits, rss_base_cpu,
1153                 rss_enable, tso_ipid_split_en,
1154                 enic->ig_vlan_strip_en);
1155
1156         return err;
1157 }
1158
1159 /* Initialize RSS with defaults, called from dev_configure */
1160 int enic_init_rss_nic_cfg(struct enic *enic)
1161 {
1162         static uint8_t default_rss_key[] = {
1163                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1164                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1165                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1166                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1167         };
1168         struct rte_eth_rss_conf rss_conf;
1169         union vnic_rss_cpu rss_cpu;
1170         int ret, i;
1171
1172         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1173         /*
1174          * If setting key for the first time, and the user gives us none, then
1175          * push the default key to NIC.
1176          */
1177         if (rss_conf.rss_key == NULL) {
1178                 rss_conf.rss_key = default_rss_key;
1179                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1180         }
1181         ret = enic_set_rss_conf(enic, &rss_conf);
1182         if (ret) {
1183                 dev_err(enic, "Failed to configure RSS\n");
1184                 return ret;
1185         }
1186         if (enic->rss_enable) {
1187                 /* If enabling RSS, use the default reta */
1188                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1189                         rss_cpu.cpu[i / 4].b[i % 4] =
1190                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1191                 }
1192                 ret = enic_set_rss_reta(enic, &rss_cpu);
1193                 if (ret)
1194                         dev_err(enic, "Failed to set RSS indirection table\n");
1195         }
1196         return ret;
1197 }
1198
1199 int enic_setup_finish(struct enic *enic)
1200 {
1201         enic_init_soft_stats(enic);
1202
1203         /* Default conf */
1204         vnic_dev_packet_filter(enic->vdev,
1205                 1 /* directed  */,
1206                 1 /* multicast */,
1207                 1 /* broadcast */,
1208                 0 /* promisc   */,
1209                 1 /* allmulti  */);
1210
1211         enic->promisc = 0;
1212         enic->allmulti = 1;
1213
1214         return 0;
1215 }
1216
1217 static int enic_rss_conf_valid(struct enic *enic,
1218                                struct rte_eth_rss_conf *rss_conf)
1219 {
1220         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1221         if (enic->flow_type_rss_offloads == 0)
1222                 return 0;
1223         if (rss_conf->rss_key != NULL &&
1224             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1225                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1226                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1227                 return -EINVAL;
1228         }
1229         if (rss_conf->rss_hf != 0 &&
1230             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1231                 dev_err(enic, "Given rss_hf contains none of the supported"
1232                         " types\n");
1233                 return -EINVAL;
1234         }
1235         return 0;
1236 }
1237
1238 /* Set hash type and key according to rss_conf */
1239 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1240 {
1241         struct rte_eth_dev *eth_dev;
1242         uint64_t rss_hf;
1243         u8 rss_hash_type;
1244         u8 rss_enable;
1245         int ret;
1246
1247         RTE_ASSERT(rss_conf != NULL);
1248         ret = enic_rss_conf_valid(enic, rss_conf);
1249         if (ret) {
1250                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1251                 return ret;
1252         }
1253
1254         eth_dev = enic->rte_dev;
1255         rss_hash_type = 0;
1256         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1257         if (enic->rq_count > 1 &&
1258             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1259             rss_hf != 0) {
1260                 rss_enable = 1;
1261                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1262                               ETH_RSS_NONFRAG_IPV4_OTHER))
1263                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1264                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1265                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1266                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1267                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1268                         if (enic->udp_rss_weak) {
1269                                 /*
1270                                  * 'TCP' is not a typo. The "weak" version of
1271                                  * UDP RSS requires both the TCP and UDP bits
1272                                  * be set. It does enable TCP RSS as well.
1273                                  */
1274                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1275                         }
1276                 }
1277                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1278                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1279                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1280                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1281                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1282                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1283                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1284                         if (enic->udp_rss_weak)
1285                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1286                 }
1287         } else {
1288                 rss_enable = 0;
1289                 rss_hf = 0;
1290         }
1291
1292         /* Set the hash key if provided */
1293         if (rss_enable && rss_conf->rss_key) {
1294                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1295                 if (ret) {
1296                         dev_err(enic, "Failed to set RSS key\n");
1297                         return ret;
1298                 }
1299         }
1300
1301         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1302                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1303                               rss_enable);
1304         if (!ret) {
1305                 enic->rss_hf = rss_hf;
1306                 enic->rss_hash_type = rss_hash_type;
1307                 enic->rss_enable = rss_enable;
1308         } else {
1309                 dev_err(enic, "Failed to update RSS configurations."
1310                         " hash=0x%x\n", rss_hash_type);
1311         }
1312         return ret;
1313 }
1314
1315 int enic_set_vlan_strip(struct enic *enic)
1316 {
1317         /*
1318          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1319          * together. So, re-do niccfg, preserving the current RSS settings.
1320          */
1321         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1322                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1323                                enic->rss_enable);
1324 }
1325
1326 void enic_add_packet_filter(struct enic *enic)
1327 {
1328         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1329         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1330                 enic->promisc, enic->allmulti);
1331 }
1332
1333 int enic_get_link_status(struct enic *enic)
1334 {
1335         return vnic_dev_link_status(enic->vdev);
1336 }
1337
1338 static void enic_dev_deinit(struct enic *enic)
1339 {
1340         struct rte_eth_dev *eth_dev = enic->rte_dev;
1341
1342         /* stop link status checking */
1343         vnic_dev_notify_unset(enic->vdev);
1344
1345         rte_free(eth_dev->data->mac_addrs);
1346         rte_free(enic->cq);
1347         rte_free(enic->intr);
1348         rte_free(enic->rq);
1349         rte_free(enic->wq);
1350 }
1351
1352
1353 int enic_set_vnic_res(struct enic *enic)
1354 {
1355         struct rte_eth_dev *eth_dev = enic->rte_dev;
1356         int rc = 0;
1357         unsigned int required_rq, required_wq, required_cq, required_intr;
1358
1359         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1360         required_rq = eth_dev->data->nb_rx_queues * 2;
1361         required_wq = eth_dev->data->nb_tx_queues;
1362         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1363         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1364         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1365                 required_intr += eth_dev->data->nb_rx_queues;
1366         }
1367
1368         if (enic->conf_rq_count < required_rq) {
1369                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1370                         eth_dev->data->nb_rx_queues,
1371                         required_rq, enic->conf_rq_count);
1372                 rc = -EINVAL;
1373         }
1374         if (enic->conf_wq_count < required_wq) {
1375                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1376                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1377                 rc = -EINVAL;
1378         }
1379
1380         if (enic->conf_cq_count < required_cq) {
1381                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1382                         required_cq, enic->conf_cq_count);
1383                 rc = -EINVAL;
1384         }
1385         if (enic->conf_intr_count < required_intr) {
1386                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1387                         " interrupts. Required:%u, Configured:%u\n",
1388                         required_intr, enic->conf_intr_count);
1389                 rc = -EINVAL;
1390         }
1391
1392         if (rc == 0) {
1393                 enic->rq_count = eth_dev->data->nb_rx_queues;
1394                 enic->wq_count = eth_dev->data->nb_tx_queues;
1395                 enic->cq_count = enic->rq_count + enic->wq_count;
1396                 enic->intr_count = required_intr;
1397         }
1398
1399         return rc;
1400 }
1401
1402 /* Initialize the completion queue for an RQ */
1403 static int
1404 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1405 {
1406         struct vnic_rq *sop_rq, *data_rq;
1407         unsigned int cq_idx;
1408         int rc = 0;
1409
1410         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1411         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1412         cq_idx = rq_idx;
1413
1414         vnic_cq_clean(&enic->cq[cq_idx]);
1415         vnic_cq_init(&enic->cq[cq_idx],
1416                      0 /* flow_control_enable */,
1417                      1 /* color_enable */,
1418                      0 /* cq_head */,
1419                      0 /* cq_tail */,
1420                      1 /* cq_tail_color */,
1421                      0 /* interrupt_enable */,
1422                      1 /* cq_entry_enable */,
1423                      0 /* cq_message_enable */,
1424                      0 /* interrupt offset */,
1425                      0 /* cq_message_addr */);
1426
1427
1428         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1429                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1430                            sop_rq->ring.desc_count - 1, 1, 0);
1431         if (data_rq->in_use) {
1432                 vnic_rq_init_start(data_rq,
1433                                    enic_cq_rq(enic,
1434                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1435                                    data_rq->ring.desc_count - 1, 1, 0);
1436         }
1437
1438         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1439         if (rc)
1440                 return rc;
1441
1442         if (data_rq->in_use) {
1443                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1444                 if (rc) {
1445                         enic_rxmbuf_queue_release(enic, sop_rq);
1446                         return rc;
1447                 }
1448         }
1449
1450         return 0;
1451 }
1452
1453 /* The Cisco NIC can send and receive packets up to a max packet size
1454  * determined by the NIC type and firmware. There is also an MTU
1455  * configured into the NIC via the CIMC/UCSM management interface
1456  * which can be overridden by this function (up to the max packet size).
1457  * Depending on the network setup, doing so may cause packet drops
1458  * and unexpected behavior.
1459  */
1460 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1461 {
1462         unsigned int rq_idx;
1463         struct vnic_rq *rq;
1464         int rc = 0;
1465         uint16_t old_mtu;       /* previous setting */
1466         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1467         struct rte_eth_dev *eth_dev = enic->rte_dev;
1468
1469         old_mtu = eth_dev->data->mtu;
1470         config_mtu = enic->config.mtu;
1471
1472         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1473                 return -E_RTE_SECONDARY;
1474
1475         if (new_mtu > enic->max_mtu) {
1476                 dev_err(enic,
1477                         "MTU not updated: requested (%u) greater than max (%u)\n",
1478                         new_mtu, enic->max_mtu);
1479                 return -EINVAL;
1480         }
1481         if (new_mtu < ENIC_MIN_MTU) {
1482                 dev_info(enic,
1483                         "MTU not updated: requested (%u) less than min (%u)\n",
1484                         new_mtu, ENIC_MIN_MTU);
1485                 return -EINVAL;
1486         }
1487         if (new_mtu > config_mtu)
1488                 dev_warning(enic,
1489                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1490                         new_mtu, config_mtu);
1491
1492         /* Update the MTU and maximum packet length */
1493         eth_dev->data->mtu = new_mtu;
1494         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1495                 enic_mtu_to_max_rx_pktlen(new_mtu);
1496
1497         /*
1498          * If the device has not started (enic_enable), nothing to do.
1499          * Later, enic_enable() will set up RQs reflecting the new maximum
1500          * packet length.
1501          */
1502         if (!eth_dev->data->dev_started)
1503                 goto set_mtu_done;
1504
1505         /*
1506          * The device has started, re-do RQs on the fly. In the process, we
1507          * pick up the new maximum packet length.
1508          *
1509          * Some applications rely on the ability to change MTU without stopping
1510          * the device. So keep this behavior for now.
1511          */
1512         rte_spinlock_lock(&enic->mtu_lock);
1513
1514         /* Stop traffic on all RQs */
1515         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1516                 rq = &enic->rq[rq_idx];
1517                 if (rq->is_sop && rq->in_use) {
1518                         rc = enic_stop_rq(enic,
1519                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1520                         if (rc) {
1521                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1522                                 goto set_mtu_done;
1523                         }
1524                 }
1525         }
1526
1527         /* replace Rx function with a no-op to avoid getting stale pkts */
1528         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1529         rte_mb();
1530
1531         /* Allow time for threads to exit the real Rx function. */
1532         usleep(100000);
1533
1534         /* now it is safe to reconfigure the RQs */
1535
1536
1537         /* free and reallocate RQs with the new MTU */
1538         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1539                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1540                 if (!rq->in_use)
1541                         continue;
1542
1543                 enic_free_rq(rq);
1544                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1545                                    rq->tot_nb_desc, rq->rx_free_thresh);
1546                 if (rc) {
1547                         dev_err(enic,
1548                                 "Fatal MTU alloc error- No traffic will pass\n");
1549                         goto set_mtu_done;
1550                 }
1551
1552                 rc = enic_reinit_rq(enic, rq_idx);
1553                 if (rc) {
1554                         dev_err(enic,
1555                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1556                         goto set_mtu_done;
1557                 }
1558         }
1559
1560         /* put back the real receive function */
1561         rte_mb();
1562         pick_rx_handler(enic);
1563         rte_mb();
1564
1565         /* restart Rx traffic */
1566         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1567                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1568                 if (rq->is_sop && rq->in_use)
1569                         enic_start_rq(enic, rq_idx);
1570         }
1571
1572 set_mtu_done:
1573         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1574         rte_spinlock_unlock(&enic->mtu_lock);
1575         return rc;
1576 }
1577
1578 static int enic_dev_init(struct enic *enic)
1579 {
1580         int err;
1581         struct rte_eth_dev *eth_dev = enic->rte_dev;
1582
1583         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1584
1585         /* Get vNIC configuration
1586         */
1587         err = enic_get_vnic_config(enic);
1588         if (err) {
1589                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1590                 return err;
1591         }
1592
1593         /* Get available resource counts */
1594         enic_get_res_counts(enic);
1595         if (enic->conf_rq_count == 1) {
1596                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1597                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1598                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1599                 return -EINVAL;
1600         }
1601         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1602         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1603                                enic->conf_cq_count, 8);
1604         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1605                                  enic->conf_intr_count, 8);
1606         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1607                                enic->conf_rq_count, 8);
1608         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1609                                enic->conf_wq_count, 8);
1610         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1611                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1612                 return -1;
1613         }
1614         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1615                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1616                 return -1;
1617         }
1618         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1619                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1620                 return -1;
1621         }
1622         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1623                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1624                 return -1;
1625         }
1626
1627         /* Get the supported filters */
1628         enic_fdir_info(enic);
1629
1630         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1631                                                 * ENIC_MAX_MAC_ADDR, 0);
1632         if (!eth_dev->data->mac_addrs) {
1633                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1634                 return -1;
1635         }
1636         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1637                         eth_dev->data->mac_addrs);
1638
1639         vnic_dev_set_reset_flag(enic->vdev, 0);
1640
1641         LIST_INIT(&enic->flows);
1642         rte_spinlock_init(&enic->flows_lock);
1643
1644         /* set up link status checking */
1645         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1646
1647         enic->overlay_offload = false;
1648         if (!enic->disable_overlay && enic->vxlan &&
1649             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1650             vnic_dev_overlay_offload_ctrl(enic->vdev,
1651                                           OVERLAY_FEATURE_VXLAN,
1652                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1653                 enic->tx_offload_capa |=
1654                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1655                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1656                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1657                 /*
1658                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1659                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1660                  */
1661                 enic->tx_offload_mask |=
1662                         PKT_TX_OUTER_IP_CKSUM |
1663                         PKT_TX_TUNNEL_MASK;
1664                 enic->overlay_offload = true;
1665                 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1666                 dev_info(enic, "Overlay offload is enabled\n");
1667         }
1668
1669         return 0;
1670
1671 }
1672
1673 int enic_probe(struct enic *enic)
1674 {
1675         struct rte_pci_device *pdev = enic->pdev;
1676         int err = -1;
1677
1678         dev_debug(enic, " Initializing ENIC PMD\n");
1679
1680         /* if this is a secondary process the hardware is already initialized */
1681         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1682                 return 0;
1683
1684         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1685         enic->bar0.len = pdev->mem_resource[0].len;
1686
1687         /* Register vNIC device */
1688         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1689         if (!enic->vdev) {
1690                 dev_err(enic, "vNIC registration failed, aborting\n");
1691                 goto err_out;
1692         }
1693
1694         LIST_INIT(&enic->memzone_list);
1695         rte_spinlock_init(&enic->memzone_list_lock);
1696
1697         vnic_register_cbacks(enic->vdev,
1698                 enic_alloc_consistent,
1699                 enic_free_consistent);
1700
1701         /*
1702          * Allocate the consistent memory for stats upfront so both primary and
1703          * secondary processes can dump stats.
1704          */
1705         err = vnic_dev_alloc_stats_mem(enic->vdev);
1706         if (err) {
1707                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1708                 goto err_out_unregister;
1709         }
1710         /* Issue device open to get device in known state */
1711         err = enic_dev_open(enic);
1712         if (err) {
1713                 dev_err(enic, "vNIC dev open failed, aborting\n");
1714                 goto err_out_unregister;
1715         }
1716
1717         /* Set ingress vlan rewrite mode before vnic initialization */
1718         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1719                   enic->ig_vlan_rewrite_mode);
1720         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1721                 enic->ig_vlan_rewrite_mode);
1722         if (err) {
1723                 dev_err(enic,
1724                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1725                 goto err_out_dev_close;
1726         }
1727
1728         /* Issue device init to initialize the vnic-to-switch link.
1729          * We'll start with carrier off and wait for link UP
1730          * notification later to turn on carrier.  We don't need
1731          * to wait here for the vnic-to-switch link initialization
1732          * to complete; link UP notification is the indication that
1733          * the process is complete.
1734          */
1735
1736         err = vnic_dev_init(enic->vdev, 0);
1737         if (err) {
1738                 dev_err(enic, "vNIC dev init failed, aborting\n");
1739                 goto err_out_dev_close;
1740         }
1741
1742         err = enic_dev_init(enic);
1743         if (err) {
1744                 dev_err(enic, "Device initialization failed, aborting\n");
1745                 goto err_out_dev_close;
1746         }
1747
1748         return 0;
1749
1750 err_out_dev_close:
1751         vnic_dev_close(enic->vdev);
1752 err_out_unregister:
1753         vnic_dev_unregister(enic->vdev);
1754 err_out:
1755         return err;
1756 }
1757
1758 void enic_remove(struct enic *enic)
1759 {
1760         enic_dev_deinit(enic);
1761         vnic_dev_close(enic->vdev);
1762         vnic_dev_unregister(enic->vdev);
1763 }