net/enic: add VLAN and csum offloads to simple Tx handler
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74         struct rte_mbuf *mbuf = *buf;
75
76         rte_pktmbuf_free_seg(mbuf);
77         *buf = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
497 {
498         struct wq_enet_desc *desc;
499         struct vnic_wq *wq;
500         unsigned int i;
501
502         /*
503          * Fill WQ descriptor fields that never change. Every descriptor is
504          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
505          * descriptors (i.e. request one completion update every 32 packets).
506          */
507         wq = &enic->wq[queue_idx];
508         desc = (struct wq_enet_desc *)wq->ring.descs;
509         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
510                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
511                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
512                         desc->header_length_flags |=
513                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
514         }
515 }
516
517 static void pick_rx_handler(struct enic *enic)
518 {
519         struct rte_eth_dev *eth_dev;
520
521         /* Use the non-scatter, simplified RX handler if possible. */
522         eth_dev = enic->rte_dev;
523         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
524                 PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
525                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
526         } else {
527                 PMD_INIT_LOG(DEBUG, " use the normal Rx handler");
528                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
529         }
530 }
531
532 int enic_enable(struct enic *enic)
533 {
534         unsigned int index;
535         int err;
536         struct rte_eth_dev *eth_dev = enic->rte_dev;
537         uint64_t simple_tx_offloads;
538
539         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
540         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
541
542         /* vnic notification of link status has already been turned on in
543          * enic_dev_init() which is called during probe time.  Here we are
544          * just turning on interrupt vector 0 if needed.
545          */
546         if (eth_dev->data->dev_conf.intr_conf.lsc)
547                 vnic_dev_notify_set(enic->vdev, 0);
548
549         err = enic_rxq_intr_init(enic);
550         if (err)
551                 return err;
552         if (enic_clsf_init(enic))
553                 dev_warning(enic, "Init of hash table for clsf failed."\
554                         "Flow director feature will not work\n");
555
556         for (index = 0; index < enic->rq_count; index++) {
557                 err = enic_alloc_rx_queue_mbufs(enic,
558                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
559                 if (err) {
560                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
561                         return err;
562                 }
563                 err = enic_alloc_rx_queue_mbufs(enic,
564                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
565                 if (err) {
566                         /* release the allocated mbufs for the sop rq*/
567                         enic_rxmbuf_queue_release(enic,
568                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
569
570                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
571                         return err;
572                 }
573         }
574
575         /*
576          * Use the simple TX handler if possible. Only checksum offloads
577          * and vlan insertion are supported.
578          */
579         simple_tx_offloads = enic->tx_offload_capa &
580                 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
581                  DEV_TX_OFFLOAD_VLAN_INSERT |
582                  DEV_TX_OFFLOAD_IPV4_CKSUM |
583                  DEV_TX_OFFLOAD_UDP_CKSUM |
584                  DEV_TX_OFFLOAD_TCP_CKSUM);
585         if ((eth_dev->data->dev_conf.txmode.offloads &
586              ~simple_tx_offloads) == 0) {
587                 PMD_INIT_LOG(DEBUG, " use the simple tx handler");
588                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
589                 for (index = 0; index < enic->wq_count; index++)
590                         enic_prep_wq_for_simple_tx(enic, index);
591         } else {
592                 PMD_INIT_LOG(DEBUG, " use the default tx handler");
593                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
594         }
595
596         pick_rx_handler(enic);
597
598         for (index = 0; index < enic->wq_count; index++)
599                 enic_start_wq(enic, index);
600         for (index = 0; index < enic->rq_count; index++)
601                 enic_start_rq(enic, index);
602
603         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
604
605         vnic_dev_enable_wait(enic->vdev);
606
607         /* Register and enable error interrupt */
608         rte_intr_callback_register(&(enic->pdev->intr_handle),
609                 enic_intr_handler, (void *)enic->rte_dev);
610
611         rte_intr_enable(&(enic->pdev->intr_handle));
612         /* Unmask LSC interrupt */
613         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
614
615         return 0;
616 }
617
618 int enic_alloc_intr_resources(struct enic *enic)
619 {
620         int err;
621         unsigned int i;
622
623         dev_info(enic, "vNIC resources used:  "\
624                 "wq %d rq %d cq %d intr %d\n",
625                 enic->wq_count, enic_vnic_rq_count(enic),
626                 enic->cq_count, enic->intr_count);
627
628         for (i = 0; i < enic->intr_count; i++) {
629                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
630                 if (err) {
631                         enic_free_vnic_resources(enic);
632                         return err;
633                 }
634         }
635         return 0;
636 }
637
638 void enic_free_rq(void *rxq)
639 {
640         struct vnic_rq *rq_sop, *rq_data;
641         struct enic *enic;
642
643         if (rxq == NULL)
644                 return;
645
646         rq_sop = (struct vnic_rq *)rxq;
647         enic = vnic_dev_priv(rq_sop->vdev);
648         rq_data = &enic->rq[rq_sop->data_queue_idx];
649
650         if (rq_sop->free_mbufs) {
651                 struct rte_mbuf **mb;
652                 int i;
653
654                 mb = rq_sop->free_mbufs;
655                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
656                      i < ENIC_RX_BURST_MAX; i++)
657                         rte_pktmbuf_free(mb[i]);
658                 rte_free(rq_sop->free_mbufs);
659                 rq_sop->free_mbufs = NULL;
660                 rq_sop->num_free_mbufs = 0;
661         }
662
663         enic_rxmbuf_queue_release(enic, rq_sop);
664         if (rq_data->in_use)
665                 enic_rxmbuf_queue_release(enic, rq_data);
666
667         rte_free(rq_sop->mbuf_ring);
668         if (rq_data->in_use)
669                 rte_free(rq_data->mbuf_ring);
670
671         rq_sop->mbuf_ring = NULL;
672         rq_data->mbuf_ring = NULL;
673
674         vnic_rq_free(rq_sop);
675         if (rq_data->in_use)
676                 vnic_rq_free(rq_data);
677
678         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
679
680         rq_sop->in_use = 0;
681         rq_data->in_use = 0;
682 }
683
684 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
685 {
686         struct rte_eth_dev *eth_dev = enic->rte_dev;
687         vnic_wq_enable(&enic->wq[queue_idx]);
688         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
689 }
690
691 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
692 {
693         struct rte_eth_dev *eth_dev = enic->rte_dev;
694         int ret;
695
696         ret = vnic_wq_disable(&enic->wq[queue_idx]);
697         if (ret)
698                 return ret;
699
700         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
701         return 0;
702 }
703
704 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
705 {
706         struct vnic_rq *rq_sop;
707         struct vnic_rq *rq_data;
708         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
709         rq_data = &enic->rq[rq_sop->data_queue_idx];
710         struct rte_eth_dev *eth_dev = enic->rte_dev;
711
712         if (rq_data->in_use) {
713                 vnic_rq_enable(rq_data);
714                 enic_initial_post_rx(enic, rq_data);
715         }
716         rte_mb();
717         vnic_rq_enable(rq_sop);
718         enic_initial_post_rx(enic, rq_sop);
719         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
720 }
721
722 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
723 {
724         int ret1 = 0, ret2 = 0;
725         struct rte_eth_dev *eth_dev = enic->rte_dev;
726         struct vnic_rq *rq_sop;
727         struct vnic_rq *rq_data;
728         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
729         rq_data = &enic->rq[rq_sop->data_queue_idx];
730
731         ret2 = vnic_rq_disable(rq_sop);
732         rte_mb();
733         if (rq_data->in_use)
734                 ret1 = vnic_rq_disable(rq_data);
735
736         if (ret2)
737                 return ret2;
738         else if (ret1)
739                 return ret1;
740
741         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
742         return 0;
743 }
744
745 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
746         unsigned int socket_id, struct rte_mempool *mp,
747         uint16_t nb_desc, uint16_t free_thresh)
748 {
749         int rc;
750         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
751         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
752         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
753         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
754         unsigned int mbuf_size, mbufs_per_pkt;
755         unsigned int nb_sop_desc, nb_data_desc;
756         uint16_t min_sop, max_sop, min_data, max_data;
757         uint32_t max_rx_pkt_len;
758
759         rq_sop->is_sop = 1;
760         rq_sop->data_queue_idx = data_queue_idx;
761         rq_data->is_sop = 0;
762         rq_data->data_queue_idx = 0;
763         rq_sop->socket_id = socket_id;
764         rq_sop->mp = mp;
765         rq_data->socket_id = socket_id;
766         rq_data->mp = mp;
767         rq_sop->in_use = 1;
768         rq_sop->rx_free_thresh = free_thresh;
769         rq_data->rx_free_thresh = free_thresh;
770         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
771                   free_thresh);
772
773         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
774                                RTE_PKTMBUF_HEADROOM);
775         /* max_rx_pkt_len includes the ethernet header and CRC. */
776         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
777
778         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
779             DEV_RX_OFFLOAD_SCATTER) {
780                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
781                 /* ceil((max pkt len)/mbuf_size) */
782                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
783         } else {
784                 dev_info(enic, "Scatter rx mode disabled\n");
785                 mbufs_per_pkt = 1;
786                 if (max_rx_pkt_len > mbuf_size) {
787                         dev_warning(enic, "The maximum Rx packet size (%u) is"
788                                     " larger than the mbuf size (%u), and"
789                                     " scatter is disabled. Larger packets will"
790                                     " be truncated.\n",
791                                     max_rx_pkt_len, mbuf_size);
792                 }
793         }
794
795         if (mbufs_per_pkt > 1) {
796                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
797                 rq_sop->data_queue_enable = 1;
798                 rq_data->in_use = 1;
799                 /*
800                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
801                  * receives packet sizes up to the "max" MTU.
802                  * If not using scatter, we can achieve the effect of dropping
803                  * larger packets by reducing the size of posted buffers.
804                  * See enic_alloc_rx_queue_mbufs().
805                  */
806                 if (max_rx_pkt_len <
807                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
808                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
809                                     " when scatter rx mode is in use.\n");
810                 }
811         } else {
812                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
813                          queue_idx);
814                 rq_sop->data_queue_enable = 0;
815                 rq_data->in_use = 0;
816         }
817
818         /* number of descriptors have to be a multiple of 32 */
819         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
820         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
821
822         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
823         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
824
825         if (mbufs_per_pkt > 1) {
826                 min_sop = ENIC_RX_BURST_MAX;
827                 max_sop = ((enic->config.rq_desc_count /
828                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
829                 min_data = min_sop * (mbufs_per_pkt - 1);
830                 max_data = enic->config.rq_desc_count;
831         } else {
832                 min_sop = ENIC_RX_BURST_MAX;
833                 max_sop = enic->config.rq_desc_count;
834                 min_data = 0;
835                 max_data = 0;
836         }
837
838         if (nb_desc < (min_sop + min_data)) {
839                 dev_warning(enic,
840                             "Number of rx descs too low, adjusting to minimum\n");
841                 nb_sop_desc = min_sop;
842                 nb_data_desc = min_data;
843         } else if (nb_desc > (max_sop + max_data)) {
844                 dev_warning(enic,
845                             "Number of rx_descs too high, adjusting to maximum\n");
846                 nb_sop_desc = max_sop;
847                 nb_data_desc = max_data;
848         }
849         if (mbufs_per_pkt > 1) {
850                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
851                          " rx descriptor range is %u to %u\n",
852                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
853                          max_sop + max_data);
854         }
855         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
856                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
857
858         /* Allocate sop queue resources */
859         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
860                 nb_sop_desc, sizeof(struct rq_enet_desc));
861         if (rc) {
862                 dev_err(enic, "error in allocation of sop rq\n");
863                 goto err_exit;
864         }
865         nb_sop_desc = rq_sop->ring.desc_count;
866
867         if (rq_data->in_use) {
868                 /* Allocate data queue resources */
869                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
870                                    nb_data_desc,
871                                    sizeof(struct rq_enet_desc));
872                 if (rc) {
873                         dev_err(enic, "error in allocation of data rq\n");
874                         goto err_free_rq_sop;
875                 }
876                 nb_data_desc = rq_data->ring.desc_count;
877         }
878         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
879                            socket_id, nb_sop_desc + nb_data_desc,
880                            sizeof(struct cq_enet_rq_desc));
881         if (rc) {
882                 dev_err(enic, "error in allocation of cq for rq\n");
883                 goto err_free_rq_data;
884         }
885
886         /* Allocate the mbuf rings */
887         rq_sop->mbuf_ring = (struct rte_mbuf **)
888                 rte_zmalloc_socket("rq->mbuf_ring",
889                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
890                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
891         if (rq_sop->mbuf_ring == NULL)
892                 goto err_free_cq;
893
894         if (rq_data->in_use) {
895                 rq_data->mbuf_ring = (struct rte_mbuf **)
896                         rte_zmalloc_socket("rq->mbuf_ring",
897                                 sizeof(struct rte_mbuf *) * nb_data_desc,
898                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
899                 if (rq_data->mbuf_ring == NULL)
900                         goto err_free_sop_mbuf;
901         }
902
903         rq_sop->free_mbufs = (struct rte_mbuf **)
904                 rte_zmalloc_socket("rq->free_mbufs",
905                                    sizeof(struct rte_mbuf *) *
906                                    ENIC_RX_BURST_MAX,
907                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
908         if (rq_sop->free_mbufs == NULL)
909                 goto err_free_data_mbuf;
910         rq_sop->num_free_mbufs = 0;
911
912         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
913
914         return 0;
915
916 err_free_data_mbuf:
917         rte_free(rq_data->mbuf_ring);
918 err_free_sop_mbuf:
919         rte_free(rq_sop->mbuf_ring);
920 err_free_cq:
921         /* cleanup on error */
922         vnic_cq_free(&enic->cq[queue_idx]);
923 err_free_rq_data:
924         if (rq_data->in_use)
925                 vnic_rq_free(rq_data);
926 err_free_rq_sop:
927         vnic_rq_free(rq_sop);
928 err_exit:
929         return -ENOMEM;
930 }
931
932 void enic_free_wq(void *txq)
933 {
934         struct vnic_wq *wq;
935         struct enic *enic;
936
937         if (txq == NULL)
938                 return;
939
940         wq = (struct vnic_wq *)txq;
941         enic = vnic_dev_priv(wq->vdev);
942         rte_memzone_free(wq->cqmsg_rz);
943         vnic_wq_free(wq);
944         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
945 }
946
947 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
948         unsigned int socket_id, uint16_t nb_desc)
949 {
950         int err;
951         struct vnic_wq *wq = &enic->wq[queue_idx];
952         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
953         char name[NAME_MAX];
954         static int instance;
955
956         wq->socket_id = socket_id;
957         /*
958          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
959          * print an info message for diagnostics.
960          */
961         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
962
963         /* Allocate queue resources */
964         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
965                 nb_desc,
966                 sizeof(struct wq_enet_desc));
967         if (err) {
968                 dev_err(enic, "error in allocation of wq\n");
969                 return err;
970         }
971
972         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
973                 socket_id, nb_desc,
974                 sizeof(struct cq_enet_wq_desc));
975         if (err) {
976                 vnic_wq_free(wq);
977                 dev_err(enic, "error in allocation of cq for wq\n");
978         }
979
980         /* setup up CQ message */
981         snprintf((char *)name, sizeof(name),
982                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
983                 instance++);
984
985         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
986                         sizeof(uint32_t), SOCKET_ID_ANY,
987                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
988         if (!wq->cqmsg_rz)
989                 return -ENOMEM;
990
991         return err;
992 }
993
994 int enic_disable(struct enic *enic)
995 {
996         unsigned int i;
997         int err;
998
999         for (i = 0; i < enic->intr_count; i++) {
1000                 vnic_intr_mask(&enic->intr[i]);
1001                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1002         }
1003         enic_rxq_intr_deinit(enic);
1004         rte_intr_disable(&enic->pdev->intr_handle);
1005         rte_intr_callback_unregister(&enic->pdev->intr_handle,
1006                                      enic_intr_handler,
1007                                      (void *)enic->rte_dev);
1008
1009         vnic_dev_disable(enic->vdev);
1010
1011         enic_clsf_destroy(enic);
1012
1013         if (!enic_is_sriov_vf(enic))
1014                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1015
1016         for (i = 0; i < enic->wq_count; i++) {
1017                 err = vnic_wq_disable(&enic->wq[i]);
1018                 if (err)
1019                         return err;
1020         }
1021         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1022                 if (enic->rq[i].in_use) {
1023                         err = vnic_rq_disable(&enic->rq[i]);
1024                         if (err)
1025                                 return err;
1026                 }
1027         }
1028
1029         /* If we were using interrupts, set the interrupt vector to -1
1030          * to disable interrupts.  We are not disabling link notifcations,
1031          * though, as we want the polling of link status to continue working.
1032          */
1033         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1034                 vnic_dev_notify_set(enic->vdev, -1);
1035
1036         vnic_dev_set_reset_flag(enic->vdev, 1);
1037
1038         for (i = 0; i < enic->wq_count; i++)
1039                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1040
1041         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1042                 if (enic->rq[i].in_use)
1043                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1044         for (i = 0; i < enic->cq_count; i++)
1045                 vnic_cq_clean(&enic->cq[i]);
1046         for (i = 0; i < enic->intr_count; i++)
1047                 vnic_intr_clean(&enic->intr[i]);
1048
1049         return 0;
1050 }
1051
1052 static int enic_dev_wait(struct vnic_dev *vdev,
1053         int (*start)(struct vnic_dev *, int),
1054         int (*finished)(struct vnic_dev *, int *),
1055         int arg)
1056 {
1057         int done;
1058         int err;
1059         int i;
1060
1061         err = start(vdev, arg);
1062         if (err)
1063                 return err;
1064
1065         /* Wait for func to complete...2 seconds max */
1066         for (i = 0; i < 2000; i++) {
1067                 err = finished(vdev, &done);
1068                 if (err)
1069                         return err;
1070                 if (done)
1071                         return 0;
1072                 usleep(1000);
1073         }
1074         return -ETIMEDOUT;
1075 }
1076
1077 static int enic_dev_open(struct enic *enic)
1078 {
1079         int err;
1080         int flags = CMD_OPENF_IG_DESCCACHE;
1081
1082         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1083                 vnic_dev_open_done, flags);
1084         if (err)
1085                 dev_err(enic_get_dev(enic),
1086                         "vNIC device open failed, err %d\n", err);
1087
1088         return err;
1089 }
1090
1091 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1092 {
1093         dma_addr_t rss_key_buf_pa;
1094         union vnic_rss_key *rss_key_buf_va = NULL;
1095         int err, i;
1096         u8 name[NAME_MAX];
1097
1098         RTE_ASSERT(user_key != NULL);
1099         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1100         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1101                 &rss_key_buf_pa, name);
1102         if (!rss_key_buf_va)
1103                 return -ENOMEM;
1104
1105         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1106                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1107
1108         err = enic_set_rss_key(enic,
1109                 rss_key_buf_pa,
1110                 sizeof(union vnic_rss_key));
1111
1112         /* Save for later queries */
1113         if (!err) {
1114                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1115                            sizeof(union vnic_rss_key));
1116         }
1117         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1118                 rss_key_buf_va, rss_key_buf_pa);
1119
1120         return err;
1121 }
1122
1123 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1124 {
1125         dma_addr_t rss_cpu_buf_pa;
1126         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1127         int err;
1128         u8 name[NAME_MAX];
1129
1130         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1131         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1132                 &rss_cpu_buf_pa, name);
1133         if (!rss_cpu_buf_va)
1134                 return -ENOMEM;
1135
1136         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1137
1138         err = enic_set_rss_cpu(enic,
1139                 rss_cpu_buf_pa,
1140                 sizeof(union vnic_rss_cpu));
1141
1142         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1143                 rss_cpu_buf_va, rss_cpu_buf_pa);
1144
1145         /* Save for later queries */
1146         if (!err)
1147                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1148         return err;
1149 }
1150
1151 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1152         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1153 {
1154         const u8 tso_ipid_split_en = 0;
1155         int err;
1156
1157         err = enic_set_nic_cfg(enic,
1158                 rss_default_cpu, rss_hash_type,
1159                 rss_hash_bits, rss_base_cpu,
1160                 rss_enable, tso_ipid_split_en,
1161                 enic->ig_vlan_strip_en);
1162
1163         return err;
1164 }
1165
1166 /* Initialize RSS with defaults, called from dev_configure */
1167 int enic_init_rss_nic_cfg(struct enic *enic)
1168 {
1169         static uint8_t default_rss_key[] = {
1170                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1171                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1172                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1173                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1174         };
1175         struct rte_eth_rss_conf rss_conf;
1176         union vnic_rss_cpu rss_cpu;
1177         int ret, i;
1178
1179         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1180         /*
1181          * If setting key for the first time, and the user gives us none, then
1182          * push the default key to NIC.
1183          */
1184         if (rss_conf.rss_key == NULL) {
1185                 rss_conf.rss_key = default_rss_key;
1186                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1187         }
1188         ret = enic_set_rss_conf(enic, &rss_conf);
1189         if (ret) {
1190                 dev_err(enic, "Failed to configure RSS\n");
1191                 return ret;
1192         }
1193         if (enic->rss_enable) {
1194                 /* If enabling RSS, use the default reta */
1195                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1196                         rss_cpu.cpu[i / 4].b[i % 4] =
1197                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1198                 }
1199                 ret = enic_set_rss_reta(enic, &rss_cpu);
1200                 if (ret)
1201                         dev_err(enic, "Failed to set RSS indirection table\n");
1202         }
1203         return ret;
1204 }
1205
1206 int enic_setup_finish(struct enic *enic)
1207 {
1208         enic_init_soft_stats(enic);
1209
1210         /* Default conf */
1211         vnic_dev_packet_filter(enic->vdev,
1212                 1 /* directed  */,
1213                 1 /* multicast */,
1214                 1 /* broadcast */,
1215                 0 /* promisc   */,
1216                 1 /* allmulti  */);
1217
1218         enic->promisc = 0;
1219         enic->allmulti = 1;
1220
1221         return 0;
1222 }
1223
1224 static int enic_rss_conf_valid(struct enic *enic,
1225                                struct rte_eth_rss_conf *rss_conf)
1226 {
1227         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1228         if (enic->flow_type_rss_offloads == 0)
1229                 return 0;
1230         if (rss_conf->rss_key != NULL &&
1231             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1232                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1233                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1234                 return -EINVAL;
1235         }
1236         if (rss_conf->rss_hf != 0 &&
1237             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1238                 dev_err(enic, "Given rss_hf contains none of the supported"
1239                         " types\n");
1240                 return -EINVAL;
1241         }
1242         return 0;
1243 }
1244
1245 /* Set hash type and key according to rss_conf */
1246 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1247 {
1248         struct rte_eth_dev *eth_dev;
1249         uint64_t rss_hf;
1250         u8 rss_hash_type;
1251         u8 rss_enable;
1252         int ret;
1253
1254         RTE_ASSERT(rss_conf != NULL);
1255         ret = enic_rss_conf_valid(enic, rss_conf);
1256         if (ret) {
1257                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1258                 return ret;
1259         }
1260
1261         eth_dev = enic->rte_dev;
1262         rss_hash_type = 0;
1263         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1264         if (enic->rq_count > 1 &&
1265             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1266             rss_hf != 0) {
1267                 rss_enable = 1;
1268                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1269                               ETH_RSS_NONFRAG_IPV4_OTHER))
1270                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1271                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1272                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1273                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1274                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1275                         if (enic->udp_rss_weak) {
1276                                 /*
1277                                  * 'TCP' is not a typo. The "weak" version of
1278                                  * UDP RSS requires both the TCP and UDP bits
1279                                  * be set. It does enable TCP RSS as well.
1280                                  */
1281                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1282                         }
1283                 }
1284                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1285                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1286                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1287                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1288                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1289                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1290                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1291                         if (enic->udp_rss_weak)
1292                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1293                 }
1294         } else {
1295                 rss_enable = 0;
1296                 rss_hf = 0;
1297         }
1298
1299         /* Set the hash key if provided */
1300         if (rss_enable && rss_conf->rss_key) {
1301                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1302                 if (ret) {
1303                         dev_err(enic, "Failed to set RSS key\n");
1304                         return ret;
1305                 }
1306         }
1307
1308         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1309                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1310                               rss_enable);
1311         if (!ret) {
1312                 enic->rss_hf = rss_hf;
1313                 enic->rss_hash_type = rss_hash_type;
1314                 enic->rss_enable = rss_enable;
1315         } else {
1316                 dev_err(enic, "Failed to update RSS configurations."
1317                         " hash=0x%x\n", rss_hash_type);
1318         }
1319         return ret;
1320 }
1321
1322 int enic_set_vlan_strip(struct enic *enic)
1323 {
1324         /*
1325          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1326          * together. So, re-do niccfg, preserving the current RSS settings.
1327          */
1328         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1329                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1330                                enic->rss_enable);
1331 }
1332
1333 void enic_add_packet_filter(struct enic *enic)
1334 {
1335         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1336         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1337                 enic->promisc, enic->allmulti);
1338 }
1339
1340 int enic_get_link_status(struct enic *enic)
1341 {
1342         return vnic_dev_link_status(enic->vdev);
1343 }
1344
1345 static void enic_dev_deinit(struct enic *enic)
1346 {
1347         struct rte_eth_dev *eth_dev = enic->rte_dev;
1348
1349         /* stop link status checking */
1350         vnic_dev_notify_unset(enic->vdev);
1351
1352         rte_free(eth_dev->data->mac_addrs);
1353         rte_free(enic->cq);
1354         rte_free(enic->intr);
1355         rte_free(enic->rq);
1356         rte_free(enic->wq);
1357 }
1358
1359
1360 int enic_set_vnic_res(struct enic *enic)
1361 {
1362         struct rte_eth_dev *eth_dev = enic->rte_dev;
1363         int rc = 0;
1364         unsigned int required_rq, required_wq, required_cq, required_intr;
1365
1366         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1367         required_rq = eth_dev->data->nb_rx_queues * 2;
1368         required_wq = eth_dev->data->nb_tx_queues;
1369         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1370         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1371         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1372                 required_intr += eth_dev->data->nb_rx_queues;
1373         }
1374
1375         if (enic->conf_rq_count < required_rq) {
1376                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1377                         eth_dev->data->nb_rx_queues,
1378                         required_rq, enic->conf_rq_count);
1379                 rc = -EINVAL;
1380         }
1381         if (enic->conf_wq_count < required_wq) {
1382                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1383                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1384                 rc = -EINVAL;
1385         }
1386
1387         if (enic->conf_cq_count < required_cq) {
1388                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1389                         required_cq, enic->conf_cq_count);
1390                 rc = -EINVAL;
1391         }
1392         if (enic->conf_intr_count < required_intr) {
1393                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1394                         " interrupts. Required:%u, Configured:%u\n",
1395                         required_intr, enic->conf_intr_count);
1396                 rc = -EINVAL;
1397         }
1398
1399         if (rc == 0) {
1400                 enic->rq_count = eth_dev->data->nb_rx_queues;
1401                 enic->wq_count = eth_dev->data->nb_tx_queues;
1402                 enic->cq_count = enic->rq_count + enic->wq_count;
1403                 enic->intr_count = required_intr;
1404         }
1405
1406         return rc;
1407 }
1408
1409 /* Initialize the completion queue for an RQ */
1410 static int
1411 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1412 {
1413         struct vnic_rq *sop_rq, *data_rq;
1414         unsigned int cq_idx;
1415         int rc = 0;
1416
1417         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1418         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1419         cq_idx = rq_idx;
1420
1421         vnic_cq_clean(&enic->cq[cq_idx]);
1422         vnic_cq_init(&enic->cq[cq_idx],
1423                      0 /* flow_control_enable */,
1424                      1 /* color_enable */,
1425                      0 /* cq_head */,
1426                      0 /* cq_tail */,
1427                      1 /* cq_tail_color */,
1428                      0 /* interrupt_enable */,
1429                      1 /* cq_entry_enable */,
1430                      0 /* cq_message_enable */,
1431                      0 /* interrupt offset */,
1432                      0 /* cq_message_addr */);
1433
1434
1435         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1436                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1437                            sop_rq->ring.desc_count - 1, 1, 0);
1438         if (data_rq->in_use) {
1439                 vnic_rq_init_start(data_rq,
1440                                    enic_cq_rq(enic,
1441                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1442                                    data_rq->ring.desc_count - 1, 1, 0);
1443         }
1444
1445         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1446         if (rc)
1447                 return rc;
1448
1449         if (data_rq->in_use) {
1450                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1451                 if (rc) {
1452                         enic_rxmbuf_queue_release(enic, sop_rq);
1453                         return rc;
1454                 }
1455         }
1456
1457         return 0;
1458 }
1459
1460 /* The Cisco NIC can send and receive packets up to a max packet size
1461  * determined by the NIC type and firmware. There is also an MTU
1462  * configured into the NIC via the CIMC/UCSM management interface
1463  * which can be overridden by this function (up to the max packet size).
1464  * Depending on the network setup, doing so may cause packet drops
1465  * and unexpected behavior.
1466  */
1467 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1468 {
1469         unsigned int rq_idx;
1470         struct vnic_rq *rq;
1471         int rc = 0;
1472         uint16_t old_mtu;       /* previous setting */
1473         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1474         struct rte_eth_dev *eth_dev = enic->rte_dev;
1475
1476         old_mtu = eth_dev->data->mtu;
1477         config_mtu = enic->config.mtu;
1478
1479         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1480                 return -E_RTE_SECONDARY;
1481
1482         if (new_mtu > enic->max_mtu) {
1483                 dev_err(enic,
1484                         "MTU not updated: requested (%u) greater than max (%u)\n",
1485                         new_mtu, enic->max_mtu);
1486                 return -EINVAL;
1487         }
1488         if (new_mtu < ENIC_MIN_MTU) {
1489                 dev_info(enic,
1490                         "MTU not updated: requested (%u) less than min (%u)\n",
1491                         new_mtu, ENIC_MIN_MTU);
1492                 return -EINVAL;
1493         }
1494         if (new_mtu > config_mtu)
1495                 dev_warning(enic,
1496                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1497                         new_mtu, config_mtu);
1498
1499         /* Update the MTU and maximum packet length */
1500         eth_dev->data->mtu = new_mtu;
1501         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1502                 enic_mtu_to_max_rx_pktlen(new_mtu);
1503
1504         /*
1505          * If the device has not started (enic_enable), nothing to do.
1506          * Later, enic_enable() will set up RQs reflecting the new maximum
1507          * packet length.
1508          */
1509         if (!eth_dev->data->dev_started)
1510                 goto set_mtu_done;
1511
1512         /*
1513          * The device has started, re-do RQs on the fly. In the process, we
1514          * pick up the new maximum packet length.
1515          *
1516          * Some applications rely on the ability to change MTU without stopping
1517          * the device. So keep this behavior for now.
1518          */
1519         rte_spinlock_lock(&enic->mtu_lock);
1520
1521         /* Stop traffic on all RQs */
1522         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1523                 rq = &enic->rq[rq_idx];
1524                 if (rq->is_sop && rq->in_use) {
1525                         rc = enic_stop_rq(enic,
1526                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1527                         if (rc) {
1528                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1529                                 goto set_mtu_done;
1530                         }
1531                 }
1532         }
1533
1534         /* replace Rx function with a no-op to avoid getting stale pkts */
1535         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1536         rte_mb();
1537
1538         /* Allow time for threads to exit the real Rx function. */
1539         usleep(100000);
1540
1541         /* now it is safe to reconfigure the RQs */
1542
1543
1544         /* free and reallocate RQs with the new MTU */
1545         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1546                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1547                 if (!rq->in_use)
1548                         continue;
1549
1550                 enic_free_rq(rq);
1551                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1552                                    rq->tot_nb_desc, rq->rx_free_thresh);
1553                 if (rc) {
1554                         dev_err(enic,
1555                                 "Fatal MTU alloc error- No traffic will pass\n");
1556                         goto set_mtu_done;
1557                 }
1558
1559                 rc = enic_reinit_rq(enic, rq_idx);
1560                 if (rc) {
1561                         dev_err(enic,
1562                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1563                         goto set_mtu_done;
1564                 }
1565         }
1566
1567         /* put back the real receive function */
1568         rte_mb();
1569         pick_rx_handler(enic);
1570         rte_mb();
1571
1572         /* restart Rx traffic */
1573         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1574                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1575                 if (rq->is_sop && rq->in_use)
1576                         enic_start_rq(enic, rq_idx);
1577         }
1578
1579 set_mtu_done:
1580         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1581         rte_spinlock_unlock(&enic->mtu_lock);
1582         return rc;
1583 }
1584
1585 static int enic_dev_init(struct enic *enic)
1586 {
1587         int err;
1588         struct rte_eth_dev *eth_dev = enic->rte_dev;
1589
1590         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1591
1592         /* Get vNIC configuration
1593         */
1594         err = enic_get_vnic_config(enic);
1595         if (err) {
1596                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1597                 return err;
1598         }
1599
1600         /* Get available resource counts */
1601         enic_get_res_counts(enic);
1602         if (enic->conf_rq_count == 1) {
1603                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1604                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1605                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1606                 return -EINVAL;
1607         }
1608         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1609         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1610                                enic->conf_cq_count, 8);
1611         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1612                                  enic->conf_intr_count, 8);
1613         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1614                                enic->conf_rq_count, 8);
1615         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1616                                enic->conf_wq_count, 8);
1617         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1618                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1619                 return -1;
1620         }
1621         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1622                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1623                 return -1;
1624         }
1625         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1626                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1627                 return -1;
1628         }
1629         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1630                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1631                 return -1;
1632         }
1633
1634         /* Get the supported filters */
1635         enic_fdir_info(enic);
1636
1637         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1638                                                 * ENIC_MAX_MAC_ADDR, 0);
1639         if (!eth_dev->data->mac_addrs) {
1640                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1641                 return -1;
1642         }
1643         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1644                         eth_dev->data->mac_addrs);
1645
1646         vnic_dev_set_reset_flag(enic->vdev, 0);
1647
1648         LIST_INIT(&enic->flows);
1649         rte_spinlock_init(&enic->flows_lock);
1650
1651         /* set up link status checking */
1652         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1653
1654         enic->overlay_offload = false;
1655         if (!enic->disable_overlay && enic->vxlan &&
1656             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1657             vnic_dev_overlay_offload_ctrl(enic->vdev,
1658                                           OVERLAY_FEATURE_VXLAN,
1659                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1660                 enic->tx_offload_capa |=
1661                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1662                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1663                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1664                 /*
1665                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1666                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1667                  */
1668                 enic->tx_offload_mask |=
1669                         PKT_TX_OUTER_IP_CKSUM |
1670                         PKT_TX_TUNNEL_MASK;
1671                 enic->overlay_offload = true;
1672                 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1673                 dev_info(enic, "Overlay offload is enabled\n");
1674                 /*
1675                  * Reset the vxlan port to the default, as the NIC firmware
1676                  * does not reset it automatically and keeps the old setting.
1677                  */
1678                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1679                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1680                                                  ENIC_DEFAULT_VXLAN_PORT)) {
1681                         dev_err(enic, "failed to update vxlan port\n");
1682                         return -EINVAL;
1683                 }
1684         }
1685
1686         return 0;
1687
1688 }
1689
1690 int enic_probe(struct enic *enic)
1691 {
1692         struct rte_pci_device *pdev = enic->pdev;
1693         int err = -1;
1694
1695         dev_debug(enic, " Initializing ENIC PMD\n");
1696
1697         /* if this is a secondary process the hardware is already initialized */
1698         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1699                 return 0;
1700
1701         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1702         enic->bar0.len = pdev->mem_resource[0].len;
1703
1704         /* Register vNIC device */
1705         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1706         if (!enic->vdev) {
1707                 dev_err(enic, "vNIC registration failed, aborting\n");
1708                 goto err_out;
1709         }
1710
1711         LIST_INIT(&enic->memzone_list);
1712         rte_spinlock_init(&enic->memzone_list_lock);
1713
1714         vnic_register_cbacks(enic->vdev,
1715                 enic_alloc_consistent,
1716                 enic_free_consistent);
1717
1718         /*
1719          * Allocate the consistent memory for stats upfront so both primary and
1720          * secondary processes can dump stats.
1721          */
1722         err = vnic_dev_alloc_stats_mem(enic->vdev);
1723         if (err) {
1724                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1725                 goto err_out_unregister;
1726         }
1727         /* Issue device open to get device in known state */
1728         err = enic_dev_open(enic);
1729         if (err) {
1730                 dev_err(enic, "vNIC dev open failed, aborting\n");
1731                 goto err_out_unregister;
1732         }
1733
1734         /* Set ingress vlan rewrite mode before vnic initialization */
1735         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1736                   enic->ig_vlan_rewrite_mode);
1737         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1738                 enic->ig_vlan_rewrite_mode);
1739         if (err) {
1740                 dev_err(enic,
1741                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1742                 goto err_out_dev_close;
1743         }
1744
1745         /* Issue device init to initialize the vnic-to-switch link.
1746          * We'll start with carrier off and wait for link UP
1747          * notification later to turn on carrier.  We don't need
1748          * to wait here for the vnic-to-switch link initialization
1749          * to complete; link UP notification is the indication that
1750          * the process is complete.
1751          */
1752
1753         err = vnic_dev_init(enic->vdev, 0);
1754         if (err) {
1755                 dev_err(enic, "vNIC dev init failed, aborting\n");
1756                 goto err_out_dev_close;
1757         }
1758
1759         err = enic_dev_init(enic);
1760         if (err) {
1761                 dev_err(enic, "Device initialization failed, aborting\n");
1762                 goto err_out_dev_close;
1763         }
1764
1765         return 0;
1766
1767 err_out_dev_close:
1768         vnic_dev_close(enic->vdev);
1769 err_out_unregister:
1770         vnic_dev_unregister(enic->vdev);
1771 err_out:
1772         return err;
1773 }
1774
1775 void enic_remove(struct enic *enic)
1776 {
1777         enic_dev_deinit(enic);
1778         vnic_dev_close(enic->vdev);
1779         vnic_dev_unregister(enic->vdev);
1780 }