1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
20 #include "enic_compat.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
30 #include "vnic_intr.h"
33 static inline int enic_is_sriov_vf(struct enic *enic)
35 return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
38 static int is_zero_addr(uint8_t *addr)
40 return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
43 static int is_mcast_addr(uint8_t *addr)
48 static int is_eth_addr_valid(uint8_t *addr)
50 return !is_mcast_addr(addr) && !is_zero_addr(addr);
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
58 if (!rq || !rq->mbuf_ring) {
59 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
63 for (i = 0; i < rq->ring.desc_count; i++) {
64 if (rq->mbuf_ring[i]) {
65 rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 rq->mbuf_ring[i] = NULL;
71 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 struct rte_mbuf *mbuf = *buf;
75 rte_pktmbuf_free_seg(mbuf);
79 static void enic_log_q_error(struct enic *enic)
82 uint32_t error_status;
84 for (i = 0; i < enic->wq_count; i++) {
85 error_status = vnic_wq_error_status(&enic->wq[i]);
87 dev_err(enic, "WQ[%d] error_status %d\n", i,
91 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 if (!enic->rq[i].in_use)
94 error_status = vnic_rq_error_status(&enic->rq[i]);
96 dev_err(enic, "RQ[%d] error_status %d\n", i,
101 static void enic_clear_soft_stats(struct enic *enic)
103 struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 rte_atomic64_clear(&soft_stats->rx_nombuf);
105 rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 rte_atomic64_clear(&soft_stats->tx_oversized);
109 static void enic_init_soft_stats(struct enic *enic)
111 struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 rte_atomic64_init(&soft_stats->rx_nombuf);
113 rte_atomic64_init(&soft_stats->rx_packet_errors);
114 rte_atomic64_init(&soft_stats->tx_oversized);
115 enic_clear_soft_stats(enic);
118 int enic_dev_stats_clear(struct enic *enic)
122 ret = vnic_dev_stats_clear(enic->vdev);
124 dev_err(enic, "Error in clearing stats\n");
127 enic_clear_soft_stats(enic);
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
134 struct vnic_stats *stats;
135 struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 int64_t rx_truncated;
137 uint64_t rx_packet_errors;
138 int ret = vnic_dev_stats_dump(enic->vdev, &stats);
141 dev_err(enic, "Error in getting stats\n");
145 /* The number of truncated packets can only be calculated by
146 * subtracting a hardware counter from error packets received by
147 * the driver. Note: this causes transient inaccuracies in the
148 * ipackets count. Also, the length of truncated packets are
149 * counted in ibytes even though truncated packets are dropped
150 * which can make ibytes be slightly higher than it should be.
152 rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 rx_truncated = rx_packet_errors - stats->rx.rx_errors;
155 r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 r_stats->opackets = stats->tx.tx_frames_ok;
158 r_stats->ibytes = stats->rx.rx_bytes_ok;
159 r_stats->obytes = stats->tx.tx_bytes_ok;
161 r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 r_stats->oerrors = stats->tx.tx_errors
163 + rte_atomic64_read(&soft_stats->tx_oversized);
165 r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
167 r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
171 int enic_del_mac_address(struct enic *enic, int mac_index)
173 struct rte_eth_dev *eth_dev = enic->rte_dev;
174 uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
176 return vnic_dev_del_addr(enic->vdev, mac_addr);
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
183 if (!is_eth_addr_valid(mac_addr)) {
184 dev_err(enic, "invalid mac address\n");
188 err = vnic_dev_add_addr(enic->vdev, mac_addr);
190 dev_err(enic, "add mac addr failed\n");
195 enic_free_rq_buf(struct rte_mbuf **mbuf)
200 rte_pktmbuf_free(*mbuf);
204 void enic_init_vnic_resources(struct enic *enic)
206 unsigned int error_interrupt_enable = 1;
207 unsigned int error_interrupt_offset = 0;
208 unsigned int rxq_interrupt_enable = 0;
209 unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 unsigned int index = 0;
212 struct vnic_rq *data_rq;
214 if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 rxq_interrupt_enable = 1;
217 for (index = 0; index < enic->rq_count; index++) {
218 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
220 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
222 error_interrupt_enable,
223 error_interrupt_offset);
225 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
227 vnic_rq_init(data_rq,
229 error_interrupt_enable,
230 error_interrupt_offset);
231 vnic_cq_init(&enic->cq[cq_idx],
232 0 /* flow_control_enable */,
233 1 /* color_enable */,
236 1 /* cq_tail_color */,
237 rxq_interrupt_enable,
238 1 /* cq_entry_enable */,
239 0 /* cq_message_enable */,
240 rxq_interrupt_offset,
241 0 /* cq_message_addr */);
242 if (rxq_interrupt_enable)
243 rxq_interrupt_offset++;
246 for (index = 0; index < enic->wq_count; index++) {
247 vnic_wq_init(&enic->wq[index],
248 enic_cq_wq(enic, index),
249 error_interrupt_enable,
250 error_interrupt_offset);
251 /* Compute unsupported ol flags for enic_prep_pkts() */
252 enic->wq[index].tx_offload_notsup_mask =
253 PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
255 cq_idx = enic_cq_wq(enic, index);
256 vnic_cq_init(&enic->cq[cq_idx],
257 0 /* flow_control_enable */,
258 1 /* color_enable */,
261 1 /* cq_tail_color */,
262 0 /* interrupt_enable */,
263 0 /* cq_entry_enable */,
264 1 /* cq_message_enable */,
265 0 /* interrupt offset */,
266 (uint64_t)enic->wq[index].cqmsg_rz->iova);
269 for (index = 0; index < enic->intr_count; index++) {
270 vnic_intr_init(&enic->intr[index],
271 enic->config.intr_timer_usec,
272 enic->config.intr_timer_type,
273 /*mask_on_assertion*/1);
279 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
282 struct rq_enet_desc *rqd = rq->ring.descs;
285 uint32_t max_rx_pkt_len;
291 dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
292 rq->ring.desc_count);
295 * If *not* using scatter and the mbuf size is greater than the
296 * requested max packet size (max_rx_pkt_len), then reduce the
297 * posted buffer size to max_rx_pkt_len. HW still receives packets
298 * larger than max_rx_pkt_len, but they will be truncated, which we
299 * drop in the rx handler. Not ideal, but better than returning
300 * large packets when the user is not expecting them.
302 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
303 rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
304 if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
305 rq_buf_len = max_rx_pkt_len;
306 for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
307 mb = rte_mbuf_raw_alloc(rq->mp);
309 dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
310 (unsigned)rq->index);
314 mb->data_off = RTE_PKTMBUF_HEADROOM;
315 dma_addr = (dma_addr_t)(mb->buf_iova
316 + RTE_PKTMBUF_HEADROOM);
317 rq_enet_desc_enc(rqd, dma_addr,
318 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
319 : RQ_ENET_TYPE_NOT_SOP),
321 rq->mbuf_ring[i] = mb;
324 * Do not post the buffers to the NIC until we enable the RQ via
327 rq->need_initial_post = true;
328 /* Initialize fetch index while RQ is disabled */
329 iowrite32(0, &rq->ctrl->fetch_index);
334 * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
335 * allocated the buffers and filled the RQ descriptor ring. Just need to push
336 * the post index to the NIC.
339 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
341 if (!rq->in_use || !rq->need_initial_post)
344 /* make sure all prior writes are complete before doing the PIO write */
347 /* Post all but the last buffer to VIC. */
348 rq->posted_index = rq->ring.desc_count - 1;
352 dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
353 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
354 iowrite32(rq->posted_index, &rq->ctrl->posted_index);
356 rq->need_initial_post = false;
360 enic_alloc_consistent(void *priv, size_t size,
361 dma_addr_t *dma_handle, uint8_t *name)
364 const struct rte_memzone *rz;
366 struct enic *enic = (struct enic *)priv;
367 struct enic_memzone_entry *mze;
369 rz = rte_memzone_reserve_aligned((const char *)name, size,
370 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
372 pr_err("%s : Failed to allocate memory requested for %s\n",
378 *dma_handle = (dma_addr_t)rz->iova;
380 mze = rte_malloc("enic memzone entry",
381 sizeof(struct enic_memzone_entry), 0);
384 pr_err("%s : Failed to allocate memory for memzone list\n",
386 rte_memzone_free(rz);
392 rte_spinlock_lock(&enic->memzone_list_lock);
393 LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
394 rte_spinlock_unlock(&enic->memzone_list_lock);
400 enic_free_consistent(void *priv,
401 __rte_unused size_t size,
403 dma_addr_t dma_handle)
405 struct enic_memzone_entry *mze;
406 struct enic *enic = (struct enic *)priv;
408 rte_spinlock_lock(&enic->memzone_list_lock);
409 LIST_FOREACH(mze, &enic->memzone_list, entries) {
410 if (mze->rz->addr == vaddr &&
411 mze->rz->iova == dma_handle)
415 rte_spinlock_unlock(&enic->memzone_list_lock);
417 "Tried to free memory, but couldn't find it in the memzone list\n");
420 LIST_REMOVE(mze, entries);
421 rte_spinlock_unlock(&enic->memzone_list_lock);
422 rte_memzone_free(mze->rz);
426 int enic_link_update(struct rte_eth_dev *eth_dev)
428 struct enic *enic = pmd_priv(eth_dev);
429 struct rte_eth_link link;
431 memset(&link, 0, sizeof(link));
432 link.link_status = enic_get_link_status(enic);
433 link.link_duplex = ETH_LINK_FULL_DUPLEX;
434 link.link_speed = vnic_dev_port_speed(enic->vdev);
436 return rte_eth_linkstatus_set(eth_dev, &link);
440 enic_intr_handler(void *arg)
442 struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
443 struct enic *enic = pmd_priv(dev);
445 vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
447 enic_link_update(dev);
448 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
449 enic_log_q_error(enic);
450 /* Re-enable irq in case of INTx */
451 rte_intr_ack(&enic->pdev->intr_handle);
454 static int enic_rxq_intr_init(struct enic *enic)
456 struct rte_intr_handle *intr_handle;
457 uint32_t rxq_intr_count, i;
460 intr_handle = enic->rte_dev->intr_handle;
461 if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
464 * Rx queue interrupts only work when we have MSI-X interrupts,
465 * one per queue. Sharing one interrupt is technically
466 * possible with VIC, but it is not worth the complications it brings.
468 if (!rte_intr_cap_multiple(intr_handle)) {
469 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
470 " (vfio-pci driver)\n");
473 rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
474 err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
476 dev_err(enic, "Failed to enable event fds for Rx queue"
480 intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
481 rxq_intr_count * sizeof(int), 0);
482 if (intr_handle->intr_vec == NULL) {
483 dev_err(enic, "Failed to allocate intr_vec\n");
486 for (i = 0; i < rxq_intr_count; i++)
487 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
491 static void enic_rxq_intr_deinit(struct enic *enic)
493 struct rte_intr_handle *intr_handle;
495 intr_handle = enic->rte_dev->intr_handle;
496 rte_intr_efd_disable(intr_handle);
497 if (intr_handle->intr_vec != NULL) {
498 rte_free(intr_handle->intr_vec);
499 intr_handle->intr_vec = NULL;
503 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
505 struct wq_enet_desc *desc;
510 * Fill WQ descriptor fields that never change. Every descriptor is
511 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
512 * descriptors (i.e. request one completion update every 32 packets).
514 wq = &enic->wq[queue_idx];
515 desc = (struct wq_enet_desc *)wq->ring.descs;
516 for (i = 0; i < wq->ring.desc_count; i++, desc++) {
517 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
518 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
519 desc->header_length_flags |=
520 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
525 * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
526 * used when that file is not compiled.
529 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
534 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
536 struct enic *enic = pmd_priv(eth_dev);
540 * 1. The vectorized handler if possible and requested.
541 * 2. The non-scatter, simplified handler if scatter Rx is not used.
542 * 3. The default handler as a fallback.
544 if (enic_use_vector_rx_handler(eth_dev))
546 if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
547 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
548 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
550 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
551 eth_dev->rx_pkt_burst = &enic_recv_pkts;
555 /* Secondary process uses this to set the Tx handler */
556 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
558 struct enic *enic = pmd_priv(eth_dev);
560 if (enic->use_simple_tx_handler) {
561 ENICPMD_LOG(DEBUG, " use the simple tx handler");
562 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
564 ENICPMD_LOG(DEBUG, " use the default tx handler");
565 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
569 int enic_enable(struct enic *enic)
573 struct rte_eth_dev *eth_dev = enic->rte_dev;
574 uint64_t simple_tx_offloads;
577 if (enic->enable_avx2_rx) {
578 struct rte_mbuf mb_def = { .buf_addr = 0 };
581 * mbuf_initializer contains const-after-init fields of
582 * receive mbufs (i.e. 64 bits of fields from rearm_data).
583 * It is currently used by the vectorized handler.
586 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
587 mb_def.port = enic->port_id;
588 rte_mbuf_refcnt_set(&mb_def, 1);
589 rte_compiler_barrier();
590 p = (uintptr_t)&mb_def.rearm_data;
591 enic->mbuf_initializer = *(uint64_t *)p;
594 eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
595 eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
597 /* vnic notification of link status has already been turned on in
598 * enic_dev_init() which is called during probe time. Here we are
599 * just turning on interrupt vector 0 if needed.
601 if (eth_dev->data->dev_conf.intr_conf.lsc)
602 vnic_dev_notify_set(enic->vdev, 0);
604 err = enic_rxq_intr_init(enic);
607 if (enic_clsf_init(enic))
608 dev_warning(enic, "Init of hash table for clsf failed."\
609 "Flow director feature will not work\n");
611 /* Initialize flowman if not already initialized during probe */
612 if (enic->fm == NULL && enic_fm_init(enic))
613 dev_warning(enic, "Init of flowman failed.\n");
615 for (index = 0; index < enic->rq_count; index++) {
616 err = enic_alloc_rx_queue_mbufs(enic,
617 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
619 dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
622 err = enic_alloc_rx_queue_mbufs(enic,
623 &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
625 /* release the allocated mbufs for the sop rq*/
626 enic_rxmbuf_queue_release(enic,
627 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
629 dev_err(enic, "Failed to alloc data RX queue mbufs\n");
635 * Use the simple TX handler if possible. Only checksum offloads
636 * and vlan insertion are supported.
638 simple_tx_offloads = enic->tx_offload_capa &
639 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
640 DEV_TX_OFFLOAD_VLAN_INSERT |
641 DEV_TX_OFFLOAD_IPV4_CKSUM |
642 DEV_TX_OFFLOAD_UDP_CKSUM |
643 DEV_TX_OFFLOAD_TCP_CKSUM);
644 if ((eth_dev->data->dev_conf.txmode.offloads &
645 ~simple_tx_offloads) == 0) {
646 ENICPMD_LOG(DEBUG, " use the simple tx handler");
647 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
648 for (index = 0; index < enic->wq_count; index++)
649 enic_prep_wq_for_simple_tx(enic, index);
650 enic->use_simple_tx_handler = 1;
652 ENICPMD_LOG(DEBUG, " use the default tx handler");
653 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
656 enic_pick_rx_handler(eth_dev);
658 for (index = 0; index < enic->wq_count; index++)
659 enic_start_wq(enic, index);
660 for (index = 0; index < enic->rq_count; index++)
661 enic_start_rq(enic, index);
663 vnic_dev_add_addr(enic->vdev, enic->mac_addr);
665 vnic_dev_enable_wait(enic->vdev);
667 /* Register and enable error interrupt */
668 rte_intr_callback_register(&(enic->pdev->intr_handle),
669 enic_intr_handler, (void *)enic->rte_dev);
671 rte_intr_enable(&(enic->pdev->intr_handle));
672 /* Unmask LSC interrupt */
673 vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
678 int enic_alloc_intr_resources(struct enic *enic)
683 dev_info(enic, "vNIC resources used: "\
684 "wq %d rq %d cq %d intr %d\n",
685 enic->wq_count, enic_vnic_rq_count(enic),
686 enic->cq_count, enic->intr_count);
688 for (i = 0; i < enic->intr_count; i++) {
689 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
691 enic_free_vnic_resources(enic);
698 void enic_free_rq(void *rxq)
700 struct vnic_rq *rq_sop, *rq_data;
706 rq_sop = (struct vnic_rq *)rxq;
707 enic = vnic_dev_priv(rq_sop->vdev);
708 rq_data = &enic->rq[rq_sop->data_queue_idx];
710 if (rq_sop->free_mbufs) {
711 struct rte_mbuf **mb;
714 mb = rq_sop->free_mbufs;
715 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
716 i < ENIC_RX_BURST_MAX; i++)
717 rte_pktmbuf_free(mb[i]);
718 rte_free(rq_sop->free_mbufs);
719 rq_sop->free_mbufs = NULL;
720 rq_sop->num_free_mbufs = 0;
723 enic_rxmbuf_queue_release(enic, rq_sop);
725 enic_rxmbuf_queue_release(enic, rq_data);
727 rte_free(rq_sop->mbuf_ring);
729 rte_free(rq_data->mbuf_ring);
731 rq_sop->mbuf_ring = NULL;
732 rq_data->mbuf_ring = NULL;
734 vnic_rq_free(rq_sop);
736 vnic_rq_free(rq_data);
738 vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
744 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
746 struct rte_eth_dev_data *data = enic->dev_data;
747 vnic_wq_enable(&enic->wq[queue_idx]);
748 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
751 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
753 struct rte_eth_dev_data *data = enic->dev_data;
756 ret = vnic_wq_disable(&enic->wq[queue_idx]);
760 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
764 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
766 struct rte_eth_dev_data *data = enic->dev_data;
767 struct vnic_rq *rq_sop;
768 struct vnic_rq *rq_data;
769 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
770 rq_data = &enic->rq[rq_sop->data_queue_idx];
772 if (rq_data->in_use) {
773 vnic_rq_enable(rq_data);
774 enic_initial_post_rx(enic, rq_data);
777 vnic_rq_enable(rq_sop);
778 enic_initial_post_rx(enic, rq_sop);
779 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
782 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
784 struct rte_eth_dev_data *data = enic->dev_data;
785 int ret1 = 0, ret2 = 0;
786 struct vnic_rq *rq_sop;
787 struct vnic_rq *rq_data;
788 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
789 rq_data = &enic->rq[rq_sop->data_queue_idx];
791 ret2 = vnic_rq_disable(rq_sop);
794 ret1 = vnic_rq_disable(rq_data);
801 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
805 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
806 unsigned int socket_id, struct rte_mempool *mp,
807 uint16_t nb_desc, uint16_t free_thresh)
810 uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
811 uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
812 struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
813 struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
814 unsigned int mbuf_size, mbufs_per_pkt;
815 unsigned int nb_sop_desc, nb_data_desc;
816 uint16_t min_sop, max_sop, min_data, max_data;
817 uint32_t max_rx_pkt_len;
820 rq_sop->data_queue_idx = data_queue_idx;
822 rq_data->data_queue_idx = 0;
823 rq_sop->socket_id = socket_id;
825 rq_data->socket_id = socket_id;
828 rq_sop->rx_free_thresh = free_thresh;
829 rq_data->rx_free_thresh = free_thresh;
830 dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
833 mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
834 RTE_PKTMBUF_HEADROOM);
835 /* max_rx_pkt_len includes the ethernet header and CRC. */
836 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
838 if (enic->rte_dev->data->dev_conf.rxmode.offloads &
839 DEV_RX_OFFLOAD_SCATTER) {
840 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
841 /* ceil((max pkt len)/mbuf_size) */
842 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
844 dev_info(enic, "Scatter rx mode disabled\n");
846 if (max_rx_pkt_len > mbuf_size) {
847 dev_warning(enic, "The maximum Rx packet size (%u) is"
848 " larger than the mbuf size (%u), and"
849 " scatter is disabled. Larger packets will"
851 max_rx_pkt_len, mbuf_size);
855 if (mbufs_per_pkt > 1) {
856 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
857 rq_sop->data_queue_enable = 1;
860 * HW does not directly support rxmode.max_rx_pkt_len. HW always
861 * receives packet sizes up to the "max" MTU.
862 * If not using scatter, we can achieve the effect of dropping
863 * larger packets by reducing the size of posted buffers.
864 * See enic_alloc_rx_queue_mbufs().
867 enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
868 dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
869 " when scatter rx mode is in use.\n");
872 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
874 rq_sop->data_queue_enable = 0;
878 /* number of descriptors have to be a multiple of 32 */
879 nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
880 nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
882 rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
883 rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
885 if (mbufs_per_pkt > 1) {
886 min_sop = ENIC_RX_BURST_MAX;
887 max_sop = ((enic->config.rq_desc_count /
888 (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
889 min_data = min_sop * (mbufs_per_pkt - 1);
890 max_data = enic->config.rq_desc_count;
892 min_sop = ENIC_RX_BURST_MAX;
893 max_sop = enic->config.rq_desc_count;
898 if (nb_desc < (min_sop + min_data)) {
900 "Number of rx descs too low, adjusting to minimum\n");
901 nb_sop_desc = min_sop;
902 nb_data_desc = min_data;
903 } else if (nb_desc > (max_sop + max_data)) {
905 "Number of rx_descs too high, adjusting to maximum\n");
906 nb_sop_desc = max_sop;
907 nb_data_desc = max_data;
909 if (mbufs_per_pkt > 1) {
910 dev_info(enic, "For max packet size %u and mbuf size %u valid"
911 " rx descriptor range is %u to %u\n",
912 max_rx_pkt_len, mbuf_size, min_sop + min_data,
915 dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
916 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
918 /* Allocate sop queue resources */
919 rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
920 nb_sop_desc, sizeof(struct rq_enet_desc));
922 dev_err(enic, "error in allocation of sop rq\n");
925 nb_sop_desc = rq_sop->ring.desc_count;
927 if (rq_data->in_use) {
928 /* Allocate data queue resources */
929 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
931 sizeof(struct rq_enet_desc));
933 dev_err(enic, "error in allocation of data rq\n");
934 goto err_free_rq_sop;
936 nb_data_desc = rq_data->ring.desc_count;
938 rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
939 socket_id, nb_sop_desc + nb_data_desc,
940 sizeof(struct cq_enet_rq_desc));
942 dev_err(enic, "error in allocation of cq for rq\n");
943 goto err_free_rq_data;
946 /* Allocate the mbuf rings */
947 rq_sop->mbuf_ring = (struct rte_mbuf **)
948 rte_zmalloc_socket("rq->mbuf_ring",
949 sizeof(struct rte_mbuf *) * nb_sop_desc,
950 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
951 if (rq_sop->mbuf_ring == NULL)
954 if (rq_data->in_use) {
955 rq_data->mbuf_ring = (struct rte_mbuf **)
956 rte_zmalloc_socket("rq->mbuf_ring",
957 sizeof(struct rte_mbuf *) * nb_data_desc,
958 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
959 if (rq_data->mbuf_ring == NULL)
960 goto err_free_sop_mbuf;
963 rq_sop->free_mbufs = (struct rte_mbuf **)
964 rte_zmalloc_socket("rq->free_mbufs",
965 sizeof(struct rte_mbuf *) *
967 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
968 if (rq_sop->free_mbufs == NULL)
969 goto err_free_data_mbuf;
970 rq_sop->num_free_mbufs = 0;
972 rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
977 rte_free(rq_data->mbuf_ring);
979 rte_free(rq_sop->mbuf_ring);
981 /* cleanup on error */
982 vnic_cq_free(&enic->cq[queue_idx]);
985 vnic_rq_free(rq_data);
987 vnic_rq_free(rq_sop);
992 void enic_free_wq(void *txq)
1000 wq = (struct vnic_wq *)txq;
1001 enic = vnic_dev_priv(wq->vdev);
1002 rte_memzone_free(wq->cqmsg_rz);
1004 vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1007 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1008 unsigned int socket_id, uint16_t nb_desc)
1011 struct vnic_wq *wq = &enic->wq[queue_idx];
1012 unsigned int cq_index = enic_cq_wq(enic, queue_idx);
1013 char name[RTE_MEMZONE_NAMESIZE];
1014 static int instance;
1016 wq->socket_id = socket_id;
1018 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1019 * print an info message for diagnostics.
1021 dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1023 /* Allocate queue resources */
1024 err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1026 sizeof(struct wq_enet_desc));
1028 dev_err(enic, "error in allocation of wq\n");
1032 err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1034 sizeof(struct cq_enet_wq_desc));
1037 dev_err(enic, "error in allocation of cq for wq\n");
1040 /* setup up CQ message */
1041 snprintf((char *)name, sizeof(name),
1042 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1045 wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1046 sizeof(uint32_t), SOCKET_ID_ANY,
1047 RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1054 int enic_disable(struct enic *enic)
1059 for (i = 0; i < enic->intr_count; i++) {
1060 vnic_intr_mask(&enic->intr[i]);
1061 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1063 enic_rxq_intr_deinit(enic);
1064 rte_intr_disable(&enic->pdev->intr_handle);
1065 rte_intr_callback_unregister(&enic->pdev->intr_handle,
1067 (void *)enic->rte_dev);
1069 vnic_dev_disable(enic->vdev);
1071 enic_clsf_destroy(enic);
1072 enic_fm_destroy(enic);
1074 if (!enic_is_sriov_vf(enic))
1075 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1077 for (i = 0; i < enic->wq_count; i++) {
1078 err = vnic_wq_disable(&enic->wq[i]);
1082 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1083 if (enic->rq[i].in_use) {
1084 err = vnic_rq_disable(&enic->rq[i]);
1090 /* If we were using interrupts, set the interrupt vector to -1
1091 * to disable interrupts. We are not disabling link notifcations,
1092 * though, as we want the polling of link status to continue working.
1094 if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1095 vnic_dev_notify_set(enic->vdev, -1);
1097 vnic_dev_set_reset_flag(enic->vdev, 1);
1099 for (i = 0; i < enic->wq_count; i++)
1100 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1102 for (i = 0; i < enic_vnic_rq_count(enic); i++)
1103 if (enic->rq[i].in_use)
1104 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1105 for (i = 0; i < enic->cq_count; i++)
1106 vnic_cq_clean(&enic->cq[i]);
1107 for (i = 0; i < enic->intr_count; i++)
1108 vnic_intr_clean(&enic->intr[i]);
1113 static int enic_dev_wait(struct vnic_dev *vdev,
1114 int (*start)(struct vnic_dev *, int),
1115 int (*finished)(struct vnic_dev *, int *),
1122 err = start(vdev, arg);
1126 /* Wait for func to complete...2 seconds max */
1127 for (i = 0; i < 2000; i++) {
1128 err = finished(vdev, &done);
1138 static int enic_dev_open(struct enic *enic)
1141 int flags = CMD_OPENF_IG_DESCCACHE;
1143 err = enic_dev_wait(enic->vdev, vnic_dev_open,
1144 vnic_dev_open_done, flags);
1146 dev_err(enic_get_dev(enic),
1147 "vNIC device open failed, err %d\n", err);
1152 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1154 dma_addr_t rss_key_buf_pa;
1155 union vnic_rss_key *rss_key_buf_va = NULL;
1157 uint8_t name[RTE_MEMZONE_NAMESIZE];
1159 RTE_ASSERT(user_key != NULL);
1160 snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1161 rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1162 &rss_key_buf_pa, name);
1163 if (!rss_key_buf_va)
1166 for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1167 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1169 err = enic_set_rss_key(enic,
1171 sizeof(union vnic_rss_key));
1173 /* Save for later queries */
1175 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1176 sizeof(union vnic_rss_key));
1178 enic_free_consistent(enic, sizeof(union vnic_rss_key),
1179 rss_key_buf_va, rss_key_buf_pa);
1184 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1186 dma_addr_t rss_cpu_buf_pa;
1187 union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1189 uint8_t name[RTE_MEMZONE_NAMESIZE];
1191 snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1192 rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1193 &rss_cpu_buf_pa, name);
1194 if (!rss_cpu_buf_va)
1197 rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1199 err = enic_set_rss_cpu(enic,
1201 sizeof(union vnic_rss_cpu));
1203 enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1204 rss_cpu_buf_va, rss_cpu_buf_pa);
1206 /* Save for later queries */
1208 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1212 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1213 uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1216 const uint8_t tso_ipid_split_en = 0;
1219 err = enic_set_nic_cfg(enic,
1220 rss_default_cpu, rss_hash_type,
1221 rss_hash_bits, rss_base_cpu,
1222 rss_enable, tso_ipid_split_en,
1223 enic->ig_vlan_strip_en);
1228 /* Initialize RSS with defaults, called from dev_configure */
1229 int enic_init_rss_nic_cfg(struct enic *enic)
1231 static uint8_t default_rss_key[] = {
1232 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1233 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1234 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1235 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1237 struct rte_eth_rss_conf rss_conf;
1238 union vnic_rss_cpu rss_cpu;
1241 rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1243 * If setting key for the first time, and the user gives us none, then
1244 * push the default key to NIC.
1246 if (rss_conf.rss_key == NULL) {
1247 rss_conf.rss_key = default_rss_key;
1248 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1250 ret = enic_set_rss_conf(enic, &rss_conf);
1252 dev_err(enic, "Failed to configure RSS\n");
1255 if (enic->rss_enable) {
1256 /* If enabling RSS, use the default reta */
1257 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1258 rss_cpu.cpu[i / 4].b[i % 4] =
1259 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1261 ret = enic_set_rss_reta(enic, &rss_cpu);
1263 dev_err(enic, "Failed to set RSS indirection table\n");
1268 int enic_setup_finish(struct enic *enic)
1270 enic_init_soft_stats(enic);
1272 /* switchdev: enable promisc mode on PF */
1273 if (enic->switchdev_mode) {
1274 vnic_dev_packet_filter(enic->vdev,
1285 vnic_dev_packet_filter(enic->vdev,
1298 static int enic_rss_conf_valid(struct enic *enic,
1299 struct rte_eth_rss_conf *rss_conf)
1301 /* RSS is disabled per VIC settings. Ignore rss_conf. */
1302 if (enic->flow_type_rss_offloads == 0)
1304 if (rss_conf->rss_key != NULL &&
1305 rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1306 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1307 rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1310 if (rss_conf->rss_hf != 0 &&
1311 (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1312 dev_err(enic, "Given rss_hf contains none of the supported"
1319 /* Set hash type and key according to rss_conf */
1320 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1322 struct rte_eth_dev *eth_dev;
1324 uint8_t rss_hash_type;
1328 RTE_ASSERT(rss_conf != NULL);
1329 ret = enic_rss_conf_valid(enic, rss_conf);
1331 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1335 eth_dev = enic->rte_dev;
1337 rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1338 if (enic->rq_count > 1 &&
1339 (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1342 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1343 ETH_RSS_NONFRAG_IPV4_OTHER))
1344 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1345 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1346 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1347 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1348 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1349 if (enic->udp_rss_weak) {
1351 * 'TCP' is not a typo. The "weak" version of
1352 * UDP RSS requires both the TCP and UDP bits
1353 * be set. It does enable TCP RSS as well.
1355 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1358 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1359 ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1360 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1361 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1362 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1363 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1364 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1365 if (enic->udp_rss_weak)
1366 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1373 /* Set the hash key if provided */
1374 if (rss_enable && rss_conf->rss_key) {
1375 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1377 dev_err(enic, "Failed to set RSS key\n");
1382 ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1383 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1386 enic->rss_hf = rss_hf;
1387 enic->rss_hash_type = rss_hash_type;
1388 enic->rss_enable = rss_enable;
1390 dev_err(enic, "Failed to update RSS configurations."
1391 " hash=0x%x\n", rss_hash_type);
1396 int enic_set_vlan_strip(struct enic *enic)
1399 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1400 * together. So, re-do niccfg, preserving the current RSS settings.
1402 return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1403 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1407 int enic_add_packet_filter(struct enic *enic)
1409 /* switchdev ignores packet filters */
1410 if (enic->switchdev_mode) {
1411 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1414 /* Args -> directed, multicast, broadcast, promisc, allmulti */
1415 return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1416 enic->promisc, enic->allmulti);
1419 int enic_get_link_status(struct enic *enic)
1421 return vnic_dev_link_status(enic->vdev);
1424 static void enic_dev_deinit(struct enic *enic)
1426 /* stop link status checking */
1427 vnic_dev_notify_unset(enic->vdev);
1429 /* mac_addrs is freed by rte_eth_dev_release_port() */
1431 rte_free(enic->intr);
1437 int enic_set_vnic_res(struct enic *enic)
1439 struct rte_eth_dev *eth_dev = enic->rte_dev;
1441 unsigned int required_rq, required_wq, required_cq, required_intr;
1443 /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1444 required_rq = eth_dev->data->nb_rx_queues * 2;
1445 required_wq = eth_dev->data->nb_tx_queues;
1446 required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1447 required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1448 if (eth_dev->data->dev_conf.intr_conf.rxq) {
1449 required_intr += eth_dev->data->nb_rx_queues;
1452 if (enic->conf_rq_count < required_rq) {
1453 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1454 eth_dev->data->nb_rx_queues,
1455 required_rq, enic->conf_rq_count);
1458 if (enic->conf_wq_count < required_wq) {
1459 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1460 eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1464 if (enic->conf_cq_count < required_cq) {
1465 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1466 required_cq, enic->conf_cq_count);
1469 if (enic->conf_intr_count < required_intr) {
1470 dev_err(dev, "Not enough Interrupts to support Rx queue"
1471 " interrupts. Required:%u, Configured:%u\n",
1472 required_intr, enic->conf_intr_count);
1477 enic->rq_count = eth_dev->data->nb_rx_queues;
1478 enic->wq_count = eth_dev->data->nb_tx_queues;
1479 enic->cq_count = enic->rq_count + enic->wq_count;
1480 enic->intr_count = required_intr;
1486 /* Initialize the completion queue for an RQ */
1488 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1490 struct vnic_rq *sop_rq, *data_rq;
1491 unsigned int cq_idx;
1494 sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1495 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1498 vnic_cq_clean(&enic->cq[cq_idx]);
1499 vnic_cq_init(&enic->cq[cq_idx],
1500 0 /* flow_control_enable */,
1501 1 /* color_enable */,
1504 1 /* cq_tail_color */,
1505 0 /* interrupt_enable */,
1506 1 /* cq_entry_enable */,
1507 0 /* cq_message_enable */,
1508 0 /* interrupt offset */,
1509 0 /* cq_message_addr */);
1512 vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1513 enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1514 sop_rq->ring.desc_count - 1, 1, 0);
1515 if (data_rq->in_use) {
1516 vnic_rq_init_start(data_rq,
1518 enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1519 0, data_rq->ring.desc_count - 1, 1, 0);
1522 rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1526 if (data_rq->in_use) {
1527 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1529 enic_rxmbuf_queue_release(enic, sop_rq);
1537 /* The Cisco NIC can send and receive packets up to a max packet size
1538 * determined by the NIC type and firmware. There is also an MTU
1539 * configured into the NIC via the CIMC/UCSM management interface
1540 * which can be overridden by this function (up to the max packet size).
1541 * Depending on the network setup, doing so may cause packet drops
1542 * and unexpected behavior.
1544 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1546 unsigned int rq_idx;
1549 uint16_t old_mtu; /* previous setting */
1550 uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */
1551 struct rte_eth_dev *eth_dev = enic->rte_dev;
1553 old_mtu = eth_dev->data->mtu;
1554 config_mtu = enic->config.mtu;
1556 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1557 return -E_RTE_SECONDARY;
1559 if (new_mtu > enic->max_mtu) {
1561 "MTU not updated: requested (%u) greater than max (%u)\n",
1562 new_mtu, enic->max_mtu);
1565 if (new_mtu < ENIC_MIN_MTU) {
1567 "MTU not updated: requested (%u) less than min (%u)\n",
1568 new_mtu, ENIC_MIN_MTU);
1571 if (new_mtu > config_mtu)
1573 "MTU (%u) is greater than value configured in NIC (%u)\n",
1574 new_mtu, config_mtu);
1576 /* Update the MTU and maximum packet length */
1577 eth_dev->data->mtu = new_mtu;
1578 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1579 enic_mtu_to_max_rx_pktlen(new_mtu);
1582 * If the device has not started (enic_enable), nothing to do.
1583 * Later, enic_enable() will set up RQs reflecting the new maximum
1586 if (!eth_dev->data->dev_started)
1590 * The device has started, re-do RQs on the fly. In the process, we
1591 * pick up the new maximum packet length.
1593 * Some applications rely on the ability to change MTU without stopping
1594 * the device. So keep this behavior for now.
1596 rte_spinlock_lock(&enic->mtu_lock);
1598 /* Stop traffic on all RQs */
1599 for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1600 rq = &enic->rq[rq_idx];
1601 if (rq->is_sop && rq->in_use) {
1602 rc = enic_stop_rq(enic,
1603 enic_sop_rq_idx_to_rte_idx(rq_idx));
1605 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1611 /* replace Rx function with a no-op to avoid getting stale pkts */
1612 eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1615 /* Allow time for threads to exit the real Rx function. */
1618 /* now it is safe to reconfigure the RQs */
1621 /* free and reallocate RQs with the new MTU */
1622 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1623 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1628 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1629 rq->tot_nb_desc, rq->rx_free_thresh);
1632 "Fatal MTU alloc error- No traffic will pass\n");
1636 rc = enic_reinit_rq(enic, rq_idx);
1639 "Fatal MTU RQ reinit- No traffic will pass\n");
1644 /* put back the real receive function */
1646 enic_pick_rx_handler(eth_dev);
1649 /* restart Rx traffic */
1650 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1651 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1652 if (rq->is_sop && rq->in_use)
1653 enic_start_rq(enic, rq_idx);
1657 dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu);
1658 rte_spinlock_unlock(&enic->mtu_lock);
1662 static int enic_dev_init(struct enic *enic)
1665 struct rte_eth_dev *eth_dev = enic->rte_dev;
1667 vnic_dev_intr_coal_timer_info_default(enic->vdev);
1669 /* Get vNIC configuration
1671 err = enic_get_vnic_config(enic);
1673 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1677 /* Get available resource counts */
1678 enic_get_res_counts(enic);
1679 if (enic->conf_rq_count == 1) {
1680 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1681 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1682 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1685 /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1686 enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1687 enic->conf_cq_count, 8);
1688 enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1689 enic->conf_intr_count, 8);
1690 enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1691 enic->conf_rq_count, 8);
1692 enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1693 enic->conf_wq_count, 8);
1694 if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1695 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1698 if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1699 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1702 if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1703 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1706 if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1707 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1711 /* Get the supported filters */
1712 enic_fdir_info(enic);
1714 eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1715 sizeof(struct rte_ether_addr) *
1716 ENIC_UNICAST_PERFECT_FILTERS, 0);
1717 if (!eth_dev->data->mac_addrs) {
1718 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1721 rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1722 eth_dev->data->mac_addrs);
1724 vnic_dev_set_reset_flag(enic->vdev, 0);
1726 LIST_INIT(&enic->flows);
1728 /* set up link status checking */
1729 vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1732 * When Geneve with options offload is available, always disable it
1733 * first as it can interfere with user flow rules.
1735 if (enic->geneve_opt_avail) {
1737 * Disabling fails if the feature is provisioned but
1738 * not enabled. So ignore result and do not log error.
1740 vnic_dev_overlay_offload_ctrl(enic->vdev,
1741 OVERLAY_FEATURE_GENEVE,
1742 OVERLAY_OFFLOAD_DISABLE);
1744 enic->overlay_offload = false;
1745 if (enic->disable_overlay && enic->vxlan) {
1747 * Explicitly disable overlay offload as the setting is
1748 * sticky, and resetting vNIC does not disable it.
1750 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1751 OVERLAY_FEATURE_VXLAN,
1752 OVERLAY_OFFLOAD_DISABLE)) {
1753 dev_err(enic, "failed to disable overlay offload\n");
1755 dev_info(enic, "Overlay offload is disabled\n");
1758 if (!enic->disable_overlay && enic->vxlan &&
1759 /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1760 vnic_dev_overlay_offload_ctrl(enic->vdev,
1761 OVERLAY_FEATURE_VXLAN,
1762 OVERLAY_OFFLOAD_ENABLE) == 0) {
1763 enic->tx_offload_capa |=
1764 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1765 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1766 DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1767 enic->tx_offload_mask |=
1770 PKT_TX_OUTER_IP_CKSUM |
1772 enic->overlay_offload = true;
1773 dev_info(enic, "Overlay offload is enabled\n");
1775 /* Geneve with options offload requires overlay offload */
1776 if (enic->overlay_offload && enic->geneve_opt_avail &&
1777 enic->geneve_opt_request) {
1778 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1779 OVERLAY_FEATURE_GENEVE,
1780 OVERLAY_OFFLOAD_ENABLE)) {
1781 dev_err(enic, "failed to enable geneve+option\n");
1783 enic->geneve_opt_enabled = 1;
1784 dev_info(enic, "Geneve with options is enabled\n");
1788 * Reset the vxlan port if HW vxlan parsing is available. It
1789 * is always enabled regardless of overlay offload
1793 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1795 * Reset the vxlan port to the default, as the NIC firmware
1796 * does not reset it automatically and keeps the old setting.
1798 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1799 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1800 RTE_VXLAN_DEFAULT_PORT)) {
1801 dev_err(enic, "failed to update vxlan port\n");
1806 if (enic_fm_init(enic))
1807 dev_warning(enic, "Init of flowman failed.\n");
1812 static void lock_devcmd(void *priv)
1814 struct enic *enic = priv;
1816 rte_spinlock_lock(&enic->devcmd_lock);
1819 static void unlock_devcmd(void *priv)
1821 struct enic *enic = priv;
1823 rte_spinlock_unlock(&enic->devcmd_lock);
1826 int enic_probe(struct enic *enic)
1828 struct rte_pci_device *pdev = enic->pdev;
1831 dev_debug(enic, "Initializing ENIC PMD\n");
1833 /* if this is a secondary process the hardware is already initialized */
1834 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1837 enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1838 enic->bar0.len = pdev->mem_resource[0].len;
1840 /* Register vNIC device */
1841 enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1843 dev_err(enic, "vNIC registration failed, aborting\n");
1847 LIST_INIT(&enic->memzone_list);
1848 rte_spinlock_init(&enic->memzone_list_lock);
1850 vnic_register_cbacks(enic->vdev,
1851 enic_alloc_consistent,
1852 enic_free_consistent);
1855 * Allocate the consistent memory for stats upfront so both primary and
1856 * secondary processes can dump stats.
1858 err = vnic_dev_alloc_stats_mem(enic->vdev);
1860 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1861 goto err_out_unregister;
1863 /* Issue device open to get device in known state */
1864 err = enic_dev_open(enic);
1866 dev_err(enic, "vNIC dev open failed, aborting\n");
1867 goto err_out_unregister;
1870 /* Set ingress vlan rewrite mode before vnic initialization */
1871 dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1872 enic->ig_vlan_rewrite_mode);
1873 err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1874 enic->ig_vlan_rewrite_mode);
1877 "Failed to set ingress vlan rewrite mode, aborting.\n");
1878 goto err_out_dev_close;
1881 /* Issue device init to initialize the vnic-to-switch link.
1882 * We'll start with carrier off and wait for link UP
1883 * notification later to turn on carrier. We don't need
1884 * to wait here for the vnic-to-switch link initialization
1885 * to complete; link UP notification is the indication that
1886 * the process is complete.
1889 err = vnic_dev_init(enic->vdev, 0);
1891 dev_err(enic, "vNIC dev init failed, aborting\n");
1892 goto err_out_dev_close;
1895 err = enic_dev_init(enic);
1897 dev_err(enic, "Device initialization failed, aborting\n");
1898 goto err_out_dev_close;
1901 /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1902 if (enic->switchdev_mode) {
1903 rte_spinlock_init(&enic->devcmd_lock);
1904 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1909 vnic_dev_close(enic->vdev);
1911 vnic_dev_unregister(enic->vdev);
1916 void enic_remove(struct enic *enic)
1918 enic_dev_deinit(enic);
1919 vnic_dev_close(enic->vdev);
1920 vnic_dev_unregister(enic->vdev);