1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
20 #include "enic_compat.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
30 #include "vnic_intr.h"
33 static inline int enic_is_sriov_vf(struct enic *enic)
35 return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
38 static int is_zero_addr(uint8_t *addr)
40 return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
43 static int is_mcast_addr(uint8_t *addr)
48 static int is_eth_addr_valid(uint8_t *addr)
50 return !is_mcast_addr(addr) && !is_zero_addr(addr);
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
58 if (!rq || !rq->mbuf_ring) {
59 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
63 for (i = 0; i < rq->ring.desc_count; i++) {
64 if (rq->mbuf_ring[i]) {
65 rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 rq->mbuf_ring[i] = NULL;
71 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 struct rte_mbuf *mbuf = *buf;
75 rte_pktmbuf_free_seg(mbuf);
79 static void enic_log_q_error(struct enic *enic)
84 for (i = 0; i < enic->wq_count; i++) {
85 error_status = vnic_wq_error_status(&enic->wq[i]);
87 dev_err(enic, "WQ[%d] error_status %d\n", i,
91 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 if (!enic->rq[i].in_use)
94 error_status = vnic_rq_error_status(&enic->rq[i]);
96 dev_err(enic, "RQ[%d] error_status %d\n", i,
101 static void enic_clear_soft_stats(struct enic *enic)
103 struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 rte_atomic64_clear(&soft_stats->rx_nombuf);
105 rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 rte_atomic64_clear(&soft_stats->tx_oversized);
109 static void enic_init_soft_stats(struct enic *enic)
111 struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 rte_atomic64_init(&soft_stats->rx_nombuf);
113 rte_atomic64_init(&soft_stats->rx_packet_errors);
114 rte_atomic64_init(&soft_stats->tx_oversized);
115 enic_clear_soft_stats(enic);
118 int enic_dev_stats_clear(struct enic *enic)
122 ret = vnic_dev_stats_clear(enic->vdev);
124 dev_err(enic, "Error in clearing stats\n");
127 enic_clear_soft_stats(enic);
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
134 struct vnic_stats *stats;
135 struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 int64_t rx_truncated;
137 uint64_t rx_packet_errors;
138 int ret = vnic_dev_stats_dump(enic->vdev, &stats);
141 dev_err(enic, "Error in getting stats\n");
145 /* The number of truncated packets can only be calculated by
146 * subtracting a hardware counter from error packets received by
147 * the driver. Note: this causes transient inaccuracies in the
148 * ipackets count. Also, the length of truncated packets are
149 * counted in ibytes even though truncated packets are dropped
150 * which can make ibytes be slightly higher than it should be.
152 rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 rx_truncated = rx_packet_errors - stats->rx.rx_errors;
155 r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 r_stats->opackets = stats->tx.tx_frames_ok;
158 r_stats->ibytes = stats->rx.rx_bytes_ok;
159 r_stats->obytes = stats->tx.tx_bytes_ok;
161 r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 r_stats->oerrors = stats->tx.tx_errors
163 + rte_atomic64_read(&soft_stats->tx_oversized);
165 r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
167 r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
171 int enic_del_mac_address(struct enic *enic, int mac_index)
173 struct rte_eth_dev *eth_dev = enic->rte_dev;
174 uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
176 return vnic_dev_del_addr(enic->vdev, mac_addr);
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
183 if (!is_eth_addr_valid(mac_addr)) {
184 dev_err(enic, "invalid mac address\n");
188 err = vnic_dev_add_addr(enic->vdev, mac_addr);
190 dev_err(enic, "add mac addr failed\n");
195 enic_free_rq_buf(struct rte_mbuf **mbuf)
200 rte_pktmbuf_free(*mbuf);
204 void enic_init_vnic_resources(struct enic *enic)
206 unsigned int error_interrupt_enable = 1;
207 unsigned int error_interrupt_offset = 0;
208 unsigned int rxq_interrupt_enable = 0;
209 unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 unsigned int index = 0;
212 struct vnic_rq *data_rq;
214 if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 rxq_interrupt_enable = 1;
217 for (index = 0; index < enic->rq_count; index++) {
218 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
220 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
222 error_interrupt_enable,
223 error_interrupt_offset);
225 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
227 vnic_rq_init(data_rq,
229 error_interrupt_enable,
230 error_interrupt_offset);
232 vnic_cq_init(&enic->cq[cq_idx],
233 0 /* flow_control_enable */,
234 1 /* color_enable */,
237 1 /* cq_tail_color */,
238 rxq_interrupt_enable,
239 1 /* cq_entry_enable */,
240 0 /* cq_message_enable */,
241 rxq_interrupt_offset,
242 0 /* cq_message_addr */);
243 if (rxq_interrupt_enable)
244 rxq_interrupt_offset++;
247 for (index = 0; index < enic->wq_count; index++) {
248 vnic_wq_init(&enic->wq[index],
249 enic_cq_wq(enic, index),
250 error_interrupt_enable,
251 error_interrupt_offset);
252 /* Compute unsupported ol flags for enic_prep_pkts() */
253 enic->wq[index].tx_offload_notsup_mask =
254 PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
256 cq_idx = enic_cq_wq(enic, index);
257 vnic_cq_init(&enic->cq[cq_idx],
258 0 /* flow_control_enable */,
259 1 /* color_enable */,
262 1 /* cq_tail_color */,
263 0 /* interrupt_enable */,
264 0 /* cq_entry_enable */,
265 1 /* cq_message_enable */,
266 0 /* interrupt offset */,
267 (u64)enic->wq[index].cqmsg_rz->iova);
270 for (index = 0; index < enic->intr_count; index++) {
271 vnic_intr_init(&enic->intr[index],
272 enic->config.intr_timer_usec,
273 enic->config.intr_timer_type,
274 /*mask_on_assertion*/1);
280 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
283 struct rq_enet_desc *rqd = rq->ring.descs;
286 uint32_t max_rx_pkt_len;
292 dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
293 rq->ring.desc_count);
296 * If *not* using scatter and the mbuf size is greater than the
297 * requested max packet size (max_rx_pkt_len), then reduce the
298 * posted buffer size to max_rx_pkt_len. HW still receives packets
299 * larger than max_rx_pkt_len, but they will be truncated, which we
300 * drop in the rx handler. Not ideal, but better than returning
301 * large packets when the user is not expecting them.
303 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
304 rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
305 if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
306 rq_buf_len = max_rx_pkt_len;
307 for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
308 mb = rte_mbuf_raw_alloc(rq->mp);
310 dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
311 (unsigned)rq->index);
315 mb->data_off = RTE_PKTMBUF_HEADROOM;
316 dma_addr = (dma_addr_t)(mb->buf_iova
317 + RTE_PKTMBUF_HEADROOM);
318 rq_enet_desc_enc(rqd, dma_addr,
319 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
320 : RQ_ENET_TYPE_NOT_SOP),
322 rq->mbuf_ring[i] = mb;
325 * Do not post the buffers to the NIC until we enable the RQ via
328 rq->need_initial_post = true;
329 /* Initialize fetch index while RQ is disabled */
330 iowrite32(0, &rq->ctrl->fetch_index);
335 * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
336 * allocated the buffers and filled the RQ descriptor ring. Just need to push
337 * the post index to the NIC.
340 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
342 if (!rq->in_use || !rq->need_initial_post)
345 /* make sure all prior writes are complete before doing the PIO write */
348 /* Post all but the last buffer to VIC. */
349 rq->posted_index = rq->ring.desc_count - 1;
353 dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
354 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
355 iowrite32(rq->posted_index, &rq->ctrl->posted_index);
357 rq->need_initial_post = false;
361 enic_alloc_consistent(void *priv, size_t size,
362 dma_addr_t *dma_handle, u8 *name)
365 const struct rte_memzone *rz;
367 struct enic *enic = (struct enic *)priv;
368 struct enic_memzone_entry *mze;
370 rz = rte_memzone_reserve_aligned((const char *)name, size,
371 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
373 pr_err("%s : Failed to allocate memory requested for %s\n",
379 *dma_handle = (dma_addr_t)rz->iova;
381 mze = rte_malloc("enic memzone entry",
382 sizeof(struct enic_memzone_entry), 0);
385 pr_err("%s : Failed to allocate memory for memzone list\n",
387 rte_memzone_free(rz);
393 rte_spinlock_lock(&enic->memzone_list_lock);
394 LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
395 rte_spinlock_unlock(&enic->memzone_list_lock);
401 enic_free_consistent(void *priv,
402 __rte_unused size_t size,
404 dma_addr_t dma_handle)
406 struct enic_memzone_entry *mze;
407 struct enic *enic = (struct enic *)priv;
409 rte_spinlock_lock(&enic->memzone_list_lock);
410 LIST_FOREACH(mze, &enic->memzone_list, entries) {
411 if (mze->rz->addr == vaddr &&
412 mze->rz->iova == dma_handle)
416 rte_spinlock_unlock(&enic->memzone_list_lock);
418 "Tried to free memory, but couldn't find it in the memzone list\n");
421 LIST_REMOVE(mze, entries);
422 rte_spinlock_unlock(&enic->memzone_list_lock);
423 rte_memzone_free(mze->rz);
427 int enic_link_update(struct rte_eth_dev *eth_dev)
429 struct enic *enic = pmd_priv(eth_dev);
430 struct rte_eth_link link;
432 memset(&link, 0, sizeof(link));
433 link.link_status = enic_get_link_status(enic);
434 link.link_duplex = ETH_LINK_FULL_DUPLEX;
435 link.link_speed = vnic_dev_port_speed(enic->vdev);
437 return rte_eth_linkstatus_set(eth_dev, &link);
441 enic_intr_handler(void *arg)
443 struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
444 struct enic *enic = pmd_priv(dev);
446 vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
448 enic_link_update(dev);
449 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
450 enic_log_q_error(enic);
453 static int enic_rxq_intr_init(struct enic *enic)
455 struct rte_intr_handle *intr_handle;
456 uint32_t rxq_intr_count, i;
459 intr_handle = enic->rte_dev->intr_handle;
460 if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
463 * Rx queue interrupts only work when we have MSI-X interrupts,
464 * one per queue. Sharing one interrupt is technically
465 * possible with VIC, but it is not worth the complications it brings.
467 if (!rte_intr_cap_multiple(intr_handle)) {
468 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469 " (vfio-pci driver)\n");
472 rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473 err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
475 dev_err(enic, "Failed to enable event fds for Rx queue"
479 intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480 rxq_intr_count * sizeof(int), 0);
481 if (intr_handle->intr_vec == NULL) {
482 dev_err(enic, "Failed to allocate intr_vec\n");
485 for (i = 0; i < rxq_intr_count; i++)
486 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
490 static void enic_rxq_intr_deinit(struct enic *enic)
492 struct rte_intr_handle *intr_handle;
494 intr_handle = enic->rte_dev->intr_handle;
495 rte_intr_efd_disable(intr_handle);
496 if (intr_handle->intr_vec != NULL) {
497 rte_free(intr_handle->intr_vec);
498 intr_handle->intr_vec = NULL;
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
504 struct wq_enet_desc *desc;
509 * Fill WQ descriptor fields that never change. Every descriptor is
510 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511 * descriptors (i.e. request one completion update every 32 packets).
513 wq = &enic->wq[queue_idx];
514 desc = (struct wq_enet_desc *)wq->ring.descs;
515 for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518 desc->header_length_flags |=
519 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
524 * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525 * used when that file is not compiled.
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
535 struct enic *enic = pmd_priv(eth_dev);
539 * 1. The vectorized handler if possible and requested.
540 * 2. The non-scatter, simplified handler if scatter Rx is not used.
541 * 3. The default handler as a fallback.
543 if (enic_use_vector_rx_handler(eth_dev))
545 if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
546 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
547 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
549 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
550 eth_dev->rx_pkt_burst = &enic_recv_pkts;
554 /* Secondary process uses this to set the Tx handler */
555 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
557 struct enic *enic = pmd_priv(eth_dev);
559 if (enic->use_simple_tx_handler) {
560 ENICPMD_LOG(DEBUG, " use the simple tx handler");
561 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
563 ENICPMD_LOG(DEBUG, " use the default tx handler");
564 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
568 int enic_enable(struct enic *enic)
572 struct rte_eth_dev *eth_dev = enic->rte_dev;
573 uint64_t simple_tx_offloads;
576 if (enic->enable_avx2_rx) {
577 struct rte_mbuf mb_def = { .buf_addr = 0 };
580 * mbuf_initializer contains const-after-init fields of
581 * receive mbufs (i.e. 64 bits of fields from rearm_data).
582 * It is currently used by the vectorized handler.
585 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
586 mb_def.port = enic->port_id;
587 rte_mbuf_refcnt_set(&mb_def, 1);
588 rte_compiler_barrier();
589 p = (uintptr_t)&mb_def.rearm_data;
590 enic->mbuf_initializer = *(uint64_t *)p;
593 eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
594 eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
596 /* vnic notification of link status has already been turned on in
597 * enic_dev_init() which is called during probe time. Here we are
598 * just turning on interrupt vector 0 if needed.
600 if (eth_dev->data->dev_conf.intr_conf.lsc)
601 vnic_dev_notify_set(enic->vdev, 0);
603 err = enic_rxq_intr_init(enic);
606 if (enic_clsf_init(enic))
607 dev_warning(enic, "Init of hash table for clsf failed."\
608 "Flow director feature will not work\n");
610 for (index = 0; index < enic->rq_count; index++) {
611 err = enic_alloc_rx_queue_mbufs(enic,
612 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
614 dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
617 err = enic_alloc_rx_queue_mbufs(enic,
618 &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
620 /* release the allocated mbufs for the sop rq*/
621 enic_rxmbuf_queue_release(enic,
622 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
624 dev_err(enic, "Failed to alloc data RX queue mbufs\n");
630 * Use the simple TX handler if possible. Only checksum offloads
631 * and vlan insertion are supported.
633 simple_tx_offloads = enic->tx_offload_capa &
634 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
635 DEV_TX_OFFLOAD_VLAN_INSERT |
636 DEV_TX_OFFLOAD_IPV4_CKSUM |
637 DEV_TX_OFFLOAD_UDP_CKSUM |
638 DEV_TX_OFFLOAD_TCP_CKSUM);
639 if ((eth_dev->data->dev_conf.txmode.offloads &
640 ~simple_tx_offloads) == 0) {
641 ENICPMD_LOG(DEBUG, " use the simple tx handler");
642 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
643 for (index = 0; index < enic->wq_count; index++)
644 enic_prep_wq_for_simple_tx(enic, index);
645 enic->use_simple_tx_handler = 1;
647 ENICPMD_LOG(DEBUG, " use the default tx handler");
648 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
651 enic_pick_rx_handler(eth_dev);
653 for (index = 0; index < enic->wq_count; index++)
654 enic_start_wq(enic, index);
655 for (index = 0; index < enic->rq_count; index++)
656 enic_start_rq(enic, index);
658 vnic_dev_add_addr(enic->vdev, enic->mac_addr);
660 vnic_dev_enable_wait(enic->vdev);
662 /* Register and enable error interrupt */
663 rte_intr_callback_register(&(enic->pdev->intr_handle),
664 enic_intr_handler, (void *)enic->rte_dev);
666 rte_intr_enable(&(enic->pdev->intr_handle));
667 /* Unmask LSC interrupt */
668 vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
673 int enic_alloc_intr_resources(struct enic *enic)
678 dev_info(enic, "vNIC resources used: "\
679 "wq %d rq %d cq %d intr %d\n",
680 enic->wq_count, enic_vnic_rq_count(enic),
681 enic->cq_count, enic->intr_count);
683 for (i = 0; i < enic->intr_count; i++) {
684 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
686 enic_free_vnic_resources(enic);
693 void enic_free_rq(void *rxq)
695 struct vnic_rq *rq_sop, *rq_data;
701 rq_sop = (struct vnic_rq *)rxq;
702 enic = vnic_dev_priv(rq_sop->vdev);
703 rq_data = &enic->rq[rq_sop->data_queue_idx];
705 if (rq_sop->free_mbufs) {
706 struct rte_mbuf **mb;
709 mb = rq_sop->free_mbufs;
710 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
711 i < ENIC_RX_BURST_MAX; i++)
712 rte_pktmbuf_free(mb[i]);
713 rte_free(rq_sop->free_mbufs);
714 rq_sop->free_mbufs = NULL;
715 rq_sop->num_free_mbufs = 0;
718 enic_rxmbuf_queue_release(enic, rq_sop);
720 enic_rxmbuf_queue_release(enic, rq_data);
722 rte_free(rq_sop->mbuf_ring);
724 rte_free(rq_data->mbuf_ring);
726 rq_sop->mbuf_ring = NULL;
727 rq_data->mbuf_ring = NULL;
729 vnic_rq_free(rq_sop);
731 vnic_rq_free(rq_data);
733 vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
739 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
741 struct rte_eth_dev_data *data = enic->dev_data;
742 vnic_wq_enable(&enic->wq[queue_idx]);
743 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
746 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
748 struct rte_eth_dev_data *data = enic->dev_data;
751 ret = vnic_wq_disable(&enic->wq[queue_idx]);
755 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
759 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
761 struct rte_eth_dev_data *data = enic->dev_data;
762 struct vnic_rq *rq_sop;
763 struct vnic_rq *rq_data;
764 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
765 rq_data = &enic->rq[rq_sop->data_queue_idx];
767 if (rq_data->in_use) {
768 vnic_rq_enable(rq_data);
769 enic_initial_post_rx(enic, rq_data);
772 vnic_rq_enable(rq_sop);
773 enic_initial_post_rx(enic, rq_sop);
774 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
777 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
779 struct rte_eth_dev_data *data = enic->dev_data;
780 int ret1 = 0, ret2 = 0;
781 struct vnic_rq *rq_sop;
782 struct vnic_rq *rq_data;
783 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
784 rq_data = &enic->rq[rq_sop->data_queue_idx];
786 ret2 = vnic_rq_disable(rq_sop);
789 ret1 = vnic_rq_disable(rq_data);
796 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
800 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
801 unsigned int socket_id, struct rte_mempool *mp,
802 uint16_t nb_desc, uint16_t free_thresh)
805 uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
806 uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
807 struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
808 struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
809 unsigned int mbuf_size, mbufs_per_pkt;
810 unsigned int nb_sop_desc, nb_data_desc;
811 uint16_t min_sop, max_sop, min_data, max_data;
812 uint32_t max_rx_pkt_len;
815 rq_sop->data_queue_idx = data_queue_idx;
817 rq_data->data_queue_idx = 0;
818 rq_sop->socket_id = socket_id;
820 rq_data->socket_id = socket_id;
823 rq_sop->rx_free_thresh = free_thresh;
824 rq_data->rx_free_thresh = free_thresh;
825 dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
828 mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
829 RTE_PKTMBUF_HEADROOM);
830 /* max_rx_pkt_len includes the ethernet header and CRC. */
831 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
833 if (enic->rte_dev->data->dev_conf.rxmode.offloads &
834 DEV_RX_OFFLOAD_SCATTER) {
835 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
836 /* ceil((max pkt len)/mbuf_size) */
837 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
839 dev_info(enic, "Scatter rx mode disabled\n");
841 if (max_rx_pkt_len > mbuf_size) {
842 dev_warning(enic, "The maximum Rx packet size (%u) is"
843 " larger than the mbuf size (%u), and"
844 " scatter is disabled. Larger packets will"
846 max_rx_pkt_len, mbuf_size);
850 if (mbufs_per_pkt > 1) {
851 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
852 rq_sop->data_queue_enable = 1;
855 * HW does not directly support rxmode.max_rx_pkt_len. HW always
856 * receives packet sizes up to the "max" MTU.
857 * If not using scatter, we can achieve the effect of dropping
858 * larger packets by reducing the size of posted buffers.
859 * See enic_alloc_rx_queue_mbufs().
862 enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
863 dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
864 " when scatter rx mode is in use.\n");
867 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
869 rq_sop->data_queue_enable = 0;
873 /* number of descriptors have to be a multiple of 32 */
874 nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
875 nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
877 rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
878 rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
880 if (mbufs_per_pkt > 1) {
881 min_sop = ENIC_RX_BURST_MAX;
882 max_sop = ((enic->config.rq_desc_count /
883 (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
884 min_data = min_sop * (mbufs_per_pkt - 1);
885 max_data = enic->config.rq_desc_count;
887 min_sop = ENIC_RX_BURST_MAX;
888 max_sop = enic->config.rq_desc_count;
893 if (nb_desc < (min_sop + min_data)) {
895 "Number of rx descs too low, adjusting to minimum\n");
896 nb_sop_desc = min_sop;
897 nb_data_desc = min_data;
898 } else if (nb_desc > (max_sop + max_data)) {
900 "Number of rx_descs too high, adjusting to maximum\n");
901 nb_sop_desc = max_sop;
902 nb_data_desc = max_data;
904 if (mbufs_per_pkt > 1) {
905 dev_info(enic, "For max packet size %u and mbuf size %u valid"
906 " rx descriptor range is %u to %u\n",
907 max_rx_pkt_len, mbuf_size, min_sop + min_data,
910 dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
911 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
913 /* Allocate sop queue resources */
914 rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
915 nb_sop_desc, sizeof(struct rq_enet_desc));
917 dev_err(enic, "error in allocation of sop rq\n");
920 nb_sop_desc = rq_sop->ring.desc_count;
922 if (rq_data->in_use) {
923 /* Allocate data queue resources */
924 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
926 sizeof(struct rq_enet_desc));
928 dev_err(enic, "error in allocation of data rq\n");
929 goto err_free_rq_sop;
931 nb_data_desc = rq_data->ring.desc_count;
933 rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
934 socket_id, nb_sop_desc + nb_data_desc,
935 sizeof(struct cq_enet_rq_desc));
937 dev_err(enic, "error in allocation of cq for rq\n");
938 goto err_free_rq_data;
941 /* Allocate the mbuf rings */
942 rq_sop->mbuf_ring = (struct rte_mbuf **)
943 rte_zmalloc_socket("rq->mbuf_ring",
944 sizeof(struct rte_mbuf *) * nb_sop_desc,
945 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
946 if (rq_sop->mbuf_ring == NULL)
949 if (rq_data->in_use) {
950 rq_data->mbuf_ring = (struct rte_mbuf **)
951 rte_zmalloc_socket("rq->mbuf_ring",
952 sizeof(struct rte_mbuf *) * nb_data_desc,
953 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
954 if (rq_data->mbuf_ring == NULL)
955 goto err_free_sop_mbuf;
958 rq_sop->free_mbufs = (struct rte_mbuf **)
959 rte_zmalloc_socket("rq->free_mbufs",
960 sizeof(struct rte_mbuf *) *
962 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
963 if (rq_sop->free_mbufs == NULL)
964 goto err_free_data_mbuf;
965 rq_sop->num_free_mbufs = 0;
967 rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
972 rte_free(rq_data->mbuf_ring);
974 rte_free(rq_sop->mbuf_ring);
976 /* cleanup on error */
977 vnic_cq_free(&enic->cq[queue_idx]);
980 vnic_rq_free(rq_data);
982 vnic_rq_free(rq_sop);
987 void enic_free_wq(void *txq)
995 wq = (struct vnic_wq *)txq;
996 enic = vnic_dev_priv(wq->vdev);
997 rte_memzone_free(wq->cqmsg_rz);
999 vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1002 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1003 unsigned int socket_id, uint16_t nb_desc)
1006 struct vnic_wq *wq = &enic->wq[queue_idx];
1007 unsigned int cq_index = enic_cq_wq(enic, queue_idx);
1008 char name[NAME_MAX];
1009 static int instance;
1011 wq->socket_id = socket_id;
1013 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1014 * print an info message for diagnostics.
1016 dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1018 /* Allocate queue resources */
1019 err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1021 sizeof(struct wq_enet_desc));
1023 dev_err(enic, "error in allocation of wq\n");
1027 err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1029 sizeof(struct cq_enet_wq_desc));
1032 dev_err(enic, "error in allocation of cq for wq\n");
1035 /* setup up CQ message */
1036 snprintf((char *)name, sizeof(name),
1037 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1040 wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1041 sizeof(uint32_t), SOCKET_ID_ANY,
1042 RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
1049 int enic_disable(struct enic *enic)
1054 for (i = 0; i < enic->intr_count; i++) {
1055 vnic_intr_mask(&enic->intr[i]);
1056 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1058 enic_rxq_intr_deinit(enic);
1059 rte_intr_disable(&enic->pdev->intr_handle);
1060 rte_intr_callback_unregister(&enic->pdev->intr_handle,
1062 (void *)enic->rte_dev);
1064 vnic_dev_disable(enic->vdev);
1066 enic_clsf_destroy(enic);
1068 if (!enic_is_sriov_vf(enic))
1069 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1071 for (i = 0; i < enic->wq_count; i++) {
1072 err = vnic_wq_disable(&enic->wq[i]);
1076 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1077 if (enic->rq[i].in_use) {
1078 err = vnic_rq_disable(&enic->rq[i]);
1084 /* If we were using interrupts, set the interrupt vector to -1
1085 * to disable interrupts. We are not disabling link notifcations,
1086 * though, as we want the polling of link status to continue working.
1088 if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1089 vnic_dev_notify_set(enic->vdev, -1);
1091 vnic_dev_set_reset_flag(enic->vdev, 1);
1093 for (i = 0; i < enic->wq_count; i++)
1094 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1096 for (i = 0; i < enic_vnic_rq_count(enic); i++)
1097 if (enic->rq[i].in_use)
1098 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1099 for (i = 0; i < enic->cq_count; i++)
1100 vnic_cq_clean(&enic->cq[i]);
1101 for (i = 0; i < enic->intr_count; i++)
1102 vnic_intr_clean(&enic->intr[i]);
1107 static int enic_dev_wait(struct vnic_dev *vdev,
1108 int (*start)(struct vnic_dev *, int),
1109 int (*finished)(struct vnic_dev *, int *),
1116 err = start(vdev, arg);
1120 /* Wait for func to complete...2 seconds max */
1121 for (i = 0; i < 2000; i++) {
1122 err = finished(vdev, &done);
1132 static int enic_dev_open(struct enic *enic)
1135 int flags = CMD_OPENF_IG_DESCCACHE;
1137 err = enic_dev_wait(enic->vdev, vnic_dev_open,
1138 vnic_dev_open_done, flags);
1140 dev_err(enic_get_dev(enic),
1141 "vNIC device open failed, err %d\n", err);
1146 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1148 dma_addr_t rss_key_buf_pa;
1149 union vnic_rss_key *rss_key_buf_va = NULL;
1153 RTE_ASSERT(user_key != NULL);
1154 snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1155 rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1156 &rss_key_buf_pa, name);
1157 if (!rss_key_buf_va)
1160 for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1161 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1163 err = enic_set_rss_key(enic,
1165 sizeof(union vnic_rss_key));
1167 /* Save for later queries */
1169 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1170 sizeof(union vnic_rss_key));
1172 enic_free_consistent(enic, sizeof(union vnic_rss_key),
1173 rss_key_buf_va, rss_key_buf_pa);
1178 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1180 dma_addr_t rss_cpu_buf_pa;
1181 union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1185 snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1186 rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1187 &rss_cpu_buf_pa, name);
1188 if (!rss_cpu_buf_va)
1191 rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1193 err = enic_set_rss_cpu(enic,
1195 sizeof(union vnic_rss_cpu));
1197 enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1198 rss_cpu_buf_va, rss_cpu_buf_pa);
1200 /* Save for later queries */
1202 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1206 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1207 u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1209 const u8 tso_ipid_split_en = 0;
1212 err = enic_set_nic_cfg(enic,
1213 rss_default_cpu, rss_hash_type,
1214 rss_hash_bits, rss_base_cpu,
1215 rss_enable, tso_ipid_split_en,
1216 enic->ig_vlan_strip_en);
1221 /* Initialize RSS with defaults, called from dev_configure */
1222 int enic_init_rss_nic_cfg(struct enic *enic)
1224 static uint8_t default_rss_key[] = {
1225 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1226 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1227 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1228 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1230 struct rte_eth_rss_conf rss_conf;
1231 union vnic_rss_cpu rss_cpu;
1234 rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1236 * If setting key for the first time, and the user gives us none, then
1237 * push the default key to NIC.
1239 if (rss_conf.rss_key == NULL) {
1240 rss_conf.rss_key = default_rss_key;
1241 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1243 ret = enic_set_rss_conf(enic, &rss_conf);
1245 dev_err(enic, "Failed to configure RSS\n");
1248 if (enic->rss_enable) {
1249 /* If enabling RSS, use the default reta */
1250 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1251 rss_cpu.cpu[i / 4].b[i % 4] =
1252 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1254 ret = enic_set_rss_reta(enic, &rss_cpu);
1256 dev_err(enic, "Failed to set RSS indirection table\n");
1261 int enic_setup_finish(struct enic *enic)
1263 enic_init_soft_stats(enic);
1266 vnic_dev_packet_filter(enic->vdev,
1279 static int enic_rss_conf_valid(struct enic *enic,
1280 struct rte_eth_rss_conf *rss_conf)
1282 /* RSS is disabled per VIC settings. Ignore rss_conf. */
1283 if (enic->flow_type_rss_offloads == 0)
1285 if (rss_conf->rss_key != NULL &&
1286 rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1287 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1288 rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1291 if (rss_conf->rss_hf != 0 &&
1292 (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1293 dev_err(enic, "Given rss_hf contains none of the supported"
1300 /* Set hash type and key according to rss_conf */
1301 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1303 struct rte_eth_dev *eth_dev;
1309 RTE_ASSERT(rss_conf != NULL);
1310 ret = enic_rss_conf_valid(enic, rss_conf);
1312 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1316 eth_dev = enic->rte_dev;
1318 rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1319 if (enic->rq_count > 1 &&
1320 (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1323 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1324 ETH_RSS_NONFRAG_IPV4_OTHER))
1325 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1326 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1327 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1328 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1329 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1330 if (enic->udp_rss_weak) {
1332 * 'TCP' is not a typo. The "weak" version of
1333 * UDP RSS requires both the TCP and UDP bits
1334 * be set. It does enable TCP RSS as well.
1336 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1339 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1340 ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1341 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1342 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1343 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1344 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1345 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1346 if (enic->udp_rss_weak)
1347 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1354 /* Set the hash key if provided */
1355 if (rss_enable && rss_conf->rss_key) {
1356 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1358 dev_err(enic, "Failed to set RSS key\n");
1363 ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1364 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1367 enic->rss_hf = rss_hf;
1368 enic->rss_hash_type = rss_hash_type;
1369 enic->rss_enable = rss_enable;
1371 dev_err(enic, "Failed to update RSS configurations."
1372 " hash=0x%x\n", rss_hash_type);
1377 int enic_set_vlan_strip(struct enic *enic)
1380 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1381 * together. So, re-do niccfg, preserving the current RSS settings.
1383 return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1384 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1388 int enic_add_packet_filter(struct enic *enic)
1390 /* Args -> directed, multicast, broadcast, promisc, allmulti */
1391 return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1392 enic->promisc, enic->allmulti);
1395 int enic_get_link_status(struct enic *enic)
1397 return vnic_dev_link_status(enic->vdev);
1400 static void enic_dev_deinit(struct enic *enic)
1402 /* stop link status checking */
1403 vnic_dev_notify_unset(enic->vdev);
1405 /* mac_addrs is freed by rte_eth_dev_release_port() */
1407 rte_free(enic->intr);
1413 int enic_set_vnic_res(struct enic *enic)
1415 struct rte_eth_dev *eth_dev = enic->rte_dev;
1417 unsigned int required_rq, required_wq, required_cq, required_intr;
1419 /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1420 required_rq = eth_dev->data->nb_rx_queues * 2;
1421 required_wq = eth_dev->data->nb_tx_queues;
1422 required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1423 required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1424 if (eth_dev->data->dev_conf.intr_conf.rxq) {
1425 required_intr += eth_dev->data->nb_rx_queues;
1428 if (enic->conf_rq_count < required_rq) {
1429 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1430 eth_dev->data->nb_rx_queues,
1431 required_rq, enic->conf_rq_count);
1434 if (enic->conf_wq_count < required_wq) {
1435 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1436 eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1440 if (enic->conf_cq_count < required_cq) {
1441 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1442 required_cq, enic->conf_cq_count);
1445 if (enic->conf_intr_count < required_intr) {
1446 dev_err(dev, "Not enough Interrupts to support Rx queue"
1447 " interrupts. Required:%u, Configured:%u\n",
1448 required_intr, enic->conf_intr_count);
1453 enic->rq_count = eth_dev->data->nb_rx_queues;
1454 enic->wq_count = eth_dev->data->nb_tx_queues;
1455 enic->cq_count = enic->rq_count + enic->wq_count;
1456 enic->intr_count = required_intr;
1462 /* Initialize the completion queue for an RQ */
1464 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1466 struct vnic_rq *sop_rq, *data_rq;
1467 unsigned int cq_idx;
1470 sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1471 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1474 vnic_cq_clean(&enic->cq[cq_idx]);
1475 vnic_cq_init(&enic->cq[cq_idx],
1476 0 /* flow_control_enable */,
1477 1 /* color_enable */,
1480 1 /* cq_tail_color */,
1481 0 /* interrupt_enable */,
1482 1 /* cq_entry_enable */,
1483 0 /* cq_message_enable */,
1484 0 /* interrupt offset */,
1485 0 /* cq_message_addr */);
1488 vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1489 enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1490 sop_rq->ring.desc_count - 1, 1, 0);
1491 if (data_rq->in_use) {
1492 vnic_rq_init_start(data_rq,
1494 enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1495 data_rq->ring.desc_count - 1, 1, 0);
1498 rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1502 if (data_rq->in_use) {
1503 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1505 enic_rxmbuf_queue_release(enic, sop_rq);
1513 /* The Cisco NIC can send and receive packets up to a max packet size
1514 * determined by the NIC type and firmware. There is also an MTU
1515 * configured into the NIC via the CIMC/UCSM management interface
1516 * which can be overridden by this function (up to the max packet size).
1517 * Depending on the network setup, doing so may cause packet drops
1518 * and unexpected behavior.
1520 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1522 unsigned int rq_idx;
1525 uint16_t old_mtu; /* previous setting */
1526 uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */
1527 struct rte_eth_dev *eth_dev = enic->rte_dev;
1529 old_mtu = eth_dev->data->mtu;
1530 config_mtu = enic->config.mtu;
1532 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1533 return -E_RTE_SECONDARY;
1535 if (new_mtu > enic->max_mtu) {
1537 "MTU not updated: requested (%u) greater than max (%u)\n",
1538 new_mtu, enic->max_mtu);
1541 if (new_mtu < ENIC_MIN_MTU) {
1543 "MTU not updated: requested (%u) less than min (%u)\n",
1544 new_mtu, ENIC_MIN_MTU);
1547 if (new_mtu > config_mtu)
1549 "MTU (%u) is greater than value configured in NIC (%u)\n",
1550 new_mtu, config_mtu);
1552 /* Update the MTU and maximum packet length */
1553 eth_dev->data->mtu = new_mtu;
1554 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1555 enic_mtu_to_max_rx_pktlen(new_mtu);
1558 * If the device has not started (enic_enable), nothing to do.
1559 * Later, enic_enable() will set up RQs reflecting the new maximum
1562 if (!eth_dev->data->dev_started)
1566 * The device has started, re-do RQs on the fly. In the process, we
1567 * pick up the new maximum packet length.
1569 * Some applications rely on the ability to change MTU without stopping
1570 * the device. So keep this behavior for now.
1572 rte_spinlock_lock(&enic->mtu_lock);
1574 /* Stop traffic on all RQs */
1575 for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1576 rq = &enic->rq[rq_idx];
1577 if (rq->is_sop && rq->in_use) {
1578 rc = enic_stop_rq(enic,
1579 enic_sop_rq_idx_to_rte_idx(rq_idx));
1581 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1587 /* replace Rx function with a no-op to avoid getting stale pkts */
1588 eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1591 /* Allow time for threads to exit the real Rx function. */
1594 /* now it is safe to reconfigure the RQs */
1597 /* free and reallocate RQs with the new MTU */
1598 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1599 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1604 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1605 rq->tot_nb_desc, rq->rx_free_thresh);
1608 "Fatal MTU alloc error- No traffic will pass\n");
1612 rc = enic_reinit_rq(enic, rq_idx);
1615 "Fatal MTU RQ reinit- No traffic will pass\n");
1620 /* put back the real receive function */
1622 enic_pick_rx_handler(eth_dev);
1625 /* restart Rx traffic */
1626 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1627 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1628 if (rq->is_sop && rq->in_use)
1629 enic_start_rq(enic, rq_idx);
1633 dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu);
1634 rte_spinlock_unlock(&enic->mtu_lock);
1638 static int enic_dev_init(struct enic *enic)
1641 struct rte_eth_dev *eth_dev = enic->rte_dev;
1643 vnic_dev_intr_coal_timer_info_default(enic->vdev);
1645 /* Get vNIC configuration
1647 err = enic_get_vnic_config(enic);
1649 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1653 /* Get available resource counts */
1654 enic_get_res_counts(enic);
1655 if (enic->conf_rq_count == 1) {
1656 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1657 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1658 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1661 /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1662 enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1663 enic->conf_cq_count, 8);
1664 enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1665 enic->conf_intr_count, 8);
1666 enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1667 enic->conf_rq_count, 8);
1668 enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1669 enic->conf_wq_count, 8);
1670 if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1671 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1674 if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1675 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1678 if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1679 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1682 if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1683 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1687 /* Get the supported filters */
1688 enic_fdir_info(enic);
1690 eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1691 sizeof(struct rte_ether_addr) *
1692 ENIC_UNICAST_PERFECT_FILTERS, 0);
1693 if (!eth_dev->data->mac_addrs) {
1694 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1697 rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1698 eth_dev->data->mac_addrs);
1700 vnic_dev_set_reset_flag(enic->vdev, 0);
1702 LIST_INIT(&enic->flows);
1704 /* set up link status checking */
1705 vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1708 * When Geneve with options offload is available, always disable it
1709 * first as it can interfere with user flow rules.
1711 if (enic->geneve_opt_avail &&
1712 vnic_dev_overlay_offload_ctrl(enic->vdev,
1713 OVERLAY_FEATURE_GENEVE,
1714 OVERLAY_OFFLOAD_DISABLE)) {
1715 dev_err(enic, "failed to disable geneve+option\n");
1717 enic->overlay_offload = false;
1718 if (enic->disable_overlay && enic->vxlan) {
1720 * Explicitly disable overlay offload as the setting is
1721 * sticky, and resetting vNIC does not disable it.
1723 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1724 OVERLAY_FEATURE_VXLAN,
1725 OVERLAY_OFFLOAD_DISABLE)) {
1726 dev_err(enic, "failed to disable overlay offload\n");
1728 dev_info(enic, "Overlay offload is disabled\n");
1731 if (!enic->disable_overlay && enic->vxlan &&
1732 /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1733 vnic_dev_overlay_offload_ctrl(enic->vdev,
1734 OVERLAY_FEATURE_VXLAN,
1735 OVERLAY_OFFLOAD_ENABLE) == 0) {
1736 enic->tx_offload_capa |=
1737 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1738 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1739 DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1740 enic->tx_offload_mask |=
1743 PKT_TX_OUTER_IP_CKSUM |
1745 enic->overlay_offload = true;
1746 dev_info(enic, "Overlay offload is enabled\n");
1748 /* Geneve with options offload requires overlay offload */
1749 if (enic->overlay_offload && enic->geneve_opt_avail &&
1750 enic->geneve_opt_request) {
1751 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1752 OVERLAY_FEATURE_GENEVE,
1753 OVERLAY_OFFLOAD_ENABLE)) {
1754 dev_err(enic, "failed to enable geneve+option\n");
1756 enic->geneve_opt_enabled = 1;
1757 dev_info(enic, "Geneve with options is enabled\n");
1761 * Reset the vxlan port if HW vxlan parsing is available. It
1762 * is always enabled regardless of overlay offload
1766 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1768 * Reset the vxlan port to the default, as the NIC firmware
1769 * does not reset it automatically and keeps the old setting.
1771 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1772 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1773 ENIC_DEFAULT_VXLAN_PORT)) {
1774 dev_err(enic, "failed to update vxlan port\n");
1783 int enic_probe(struct enic *enic)
1785 struct rte_pci_device *pdev = enic->pdev;
1788 dev_debug(enic, "Initializing ENIC PMD\n");
1790 /* if this is a secondary process the hardware is already initialized */
1791 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1794 enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1795 enic->bar0.len = pdev->mem_resource[0].len;
1797 /* Register vNIC device */
1798 enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1800 dev_err(enic, "vNIC registration failed, aborting\n");
1804 LIST_INIT(&enic->memzone_list);
1805 rte_spinlock_init(&enic->memzone_list_lock);
1807 vnic_register_cbacks(enic->vdev,
1808 enic_alloc_consistent,
1809 enic_free_consistent);
1812 * Allocate the consistent memory for stats upfront so both primary and
1813 * secondary processes can dump stats.
1815 err = vnic_dev_alloc_stats_mem(enic->vdev);
1817 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1818 goto err_out_unregister;
1820 /* Issue device open to get device in known state */
1821 err = enic_dev_open(enic);
1823 dev_err(enic, "vNIC dev open failed, aborting\n");
1824 goto err_out_unregister;
1827 /* Set ingress vlan rewrite mode before vnic initialization */
1828 dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1829 enic->ig_vlan_rewrite_mode);
1830 err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1831 enic->ig_vlan_rewrite_mode);
1834 "Failed to set ingress vlan rewrite mode, aborting.\n");
1835 goto err_out_dev_close;
1838 /* Issue device init to initialize the vnic-to-switch link.
1839 * We'll start with carrier off and wait for link UP
1840 * notification later to turn on carrier. We don't need
1841 * to wait here for the vnic-to-switch link initialization
1842 * to complete; link UP notification is the indication that
1843 * the process is complete.
1846 err = vnic_dev_init(enic->vdev, 0);
1848 dev_err(enic, "vNIC dev init failed, aborting\n");
1849 goto err_out_dev_close;
1852 err = enic_dev_init(enic);
1854 dev_err(enic, "Device initialization failed, aborting\n");
1855 goto err_out_dev_close;
1861 vnic_dev_close(enic->vdev);
1863 vnic_dev_unregister(enic->vdev);
1868 void enic_remove(struct enic *enic)
1870 enic_dev_deinit(enic);
1871 vnic_dev_close(enic->vdev);
1872 vnic_dev_unregister(enic->vdev);