1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
59 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
80 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
83 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84 (unsigned long)rxq->cmd_ring[0].basePA,
85 (unsigned long)rxq->cmd_ring[1].basePA,
86 (unsigned long)rxq->comp_ring.basePA);
88 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
90 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91 (uint32_t)rxq->cmd_ring[0].size, avail,
92 rxq->comp_ring.next2proc,
93 rxq->cmd_ring[0].size - avail);
95 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98 rxq->cmd_ring[1].size - avail);
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
110 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113 (unsigned long)txq->cmd_ring.basePA,
114 (unsigned long)txq->comp_ring.basePA,
115 (unsigned long)txq->data_ring.basePA);
117 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119 (uint32_t)txq->cmd_ring.size, avail,
120 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
127 while (ring->next2comp != ring->next2fill) {
128 /* No need to worry about desc ownership, device is quiesced by now. */
129 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
132 rte_pktmbuf_free(buf_info->m);
137 vmxnet3_cmd_ring_adv_next2comp(ring);
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
146 for (i = 0; i < ring->size; i++) {
147 /* No need to worry about desc ownership, device is quiesced by now. */
148 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
151 rte_pktmbuf_free_seg(buf_info->m);
156 vmxnet3_cmd_ring_adv_next2comp(ring);
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
163 rte_free(ring->buf_info);
164 ring->buf_info = NULL;
168 vmxnet3_dev_tx_queue_release(void *txq)
170 vmxnet3_tx_queue_t *tq = txq;
174 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175 /* Release the cmd_ring */
176 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177 /* Release the memzone */
178 rte_memzone_free(tq->mz);
179 /* Release the queue */
185 vmxnet3_dev_rx_queue_release(void *rxq)
188 vmxnet3_rx_queue_t *rq = rxq;
192 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
195 /* Release both the cmd_rings */
196 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
199 /* Release the memzone */
200 rte_memzone_free(rq->mz);
202 /* Release the queue */
208 vmxnet3_dev_tx_queue_reset(void *txq)
210 vmxnet3_tx_queue_t *tq = txq;
211 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
217 /* Release the cmd_ring mbufs */
218 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
221 /* Tx vmxnet rings structure initialization*/
224 ring->gen = VMXNET3_INIT_GEN;
225 comp_ring->next2proc = 0;
226 comp_ring->gen = VMXNET3_INIT_GEN;
228 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230 size += tq->txdata_desc_size * data_ring->size;
232 memset(ring->base, 0, size);
236 vmxnet3_dev_rx_queue_reset(void *rxq)
239 vmxnet3_rx_queue_t *rq = rxq;
240 struct vmxnet3_hw *hw = rq->hw;
241 struct vmxnet3_cmd_ring *ring0, *ring1;
242 struct vmxnet3_comp_ring *comp_ring;
243 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
246 /* Release both the cmd_rings mbufs */
247 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250 ring0 = &rq->cmd_ring[0];
251 ring1 = &rq->cmd_ring[1];
252 comp_ring = &rq->comp_ring;
254 /* Rx vmxnet rings structure initialization */
255 ring0->next2fill = 0;
256 ring1->next2fill = 0;
257 ring0->next2comp = 0;
258 ring1->next2comp = 0;
259 ring0->gen = VMXNET3_INIT_GEN;
260 ring1->gen = VMXNET3_INIT_GEN;
261 comp_ring->next2proc = 0;
262 comp_ring->gen = VMXNET3_INIT_GEN;
264 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267 size += rq->data_desc_size * data_ring->size;
269 memset(ring0->base, 0, size);
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
277 PMD_INIT_FUNC_TRACE();
279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
284 vmxnet3_dev_tx_queue_reset(txq);
288 for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
293 vmxnet3_dev_rx_queue_reset(rxq);
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
302 struct rte_mbuf *mbuf;
304 /* Release cmd_ring descriptor and free mbuf */
305 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
307 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
309 rte_panic("EOP desc does not point to a valid mbuf");
310 rte_pktmbuf_free(mbuf);
312 txq->cmd_ring.buf_info[eop_idx].m = NULL;
314 while (txq->cmd_ring.next2comp != eop_idx) {
315 /* no out-of-order completion */
316 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
321 /* Mark the txd for which tcd was generated as completed */
322 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
324 return completed + 1;
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
331 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333 (comp_ring->base + comp_ring->next2proc);
335 while (tcd->gen == comp_ring->gen) {
336 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
338 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340 comp_ring->next2proc);
343 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
355 for (i = 0; i != nb_pkts; i++) {
357 ol_flags = m->ol_flags;
359 /* Non-TSO packet cannot occupy more than
360 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
362 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
368 /* check that only supported TX offloads are requested. */
369 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370 (ol_flags & PKT_TX_L4_MASK) ==
372 rte_errno = -ENOTSUP;
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377 ret = rte_validate_tx_offload(m);
383 ret = rte_net_intel_cksum_prepare(m);
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
398 vmxnet3_tx_queue_t *txq = tx_queue;
399 struct vmxnet3_hw *hw = txq->hw;
400 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
403 if (unlikely(txq->stopped)) {
404 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
408 /* Free up the comp_descriptors aggressively */
409 vmxnet3_tq_tx_complete(txq);
412 while (nb_tx < nb_pkts) {
413 Vmxnet3_GenericDesc *gdesc;
414 vmxnet3_buf_info_t *tbi;
415 uint32_t first2fill, avail, dw2;
416 struct rte_mbuf *txm = tx_pkts[nb_tx];
417 struct rte_mbuf *m_seg = txm;
419 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420 /* # of descriptors needed for a packet. */
421 unsigned count = txm->nb_segs;
423 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
425 /* Is command ring full? */
426 if (unlikely(avail == 0)) {
427 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428 txq->stats.tx_ring_full++;
429 txq->stats.drop_total += (nb_pkts - nb_tx);
433 /* Command ring is not full but cannot handle the
434 * multi-segmented packet. Let's try the next packet
437 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438 "(avail %d needed %d)", avail, count);
439 txq->stats.drop_total++;
441 txq->stats.drop_tso++;
442 rte_pktmbuf_free(txm);
447 /* Drop non-TSO packet that is excessively fragmented */
448 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451 txq->stats.drop_too_many_segs++;
452 txq->stats.drop_total++;
453 rte_pktmbuf_free(txm);
458 if (txm->nb_segs == 1 &&
459 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460 struct Vmxnet3_TxDataDesc *tdd;
462 /* Skip empty packets */
463 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464 txq->stats.drop_total++;
465 rte_pktmbuf_free(txm);
470 tdd = (struct Vmxnet3_TxDataDesc *)
471 ((uint8 *)txq->data_ring.base +
472 txq->cmd_ring.next2fill *
473 txq->txdata_desc_size);
474 copy_size = rte_pktmbuf_pkt_len(txm);
475 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478 /* use the previous gen bit for the SOP desc */
479 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480 first2fill = txq->cmd_ring.next2fill;
482 /* Remember the transmit buffer for cleanup */
483 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
485 /* NB: the following assumes that VMXNET3 maximum
486 * transmit buffer size (16K) is greater than
487 * maximum size of mbuf segment size.
489 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
491 /* Skip empty segments */
492 if (unlikely(m_seg->data_len == 0))
497 (uint64)txq->cmd_ring.next2fill *
498 txq->txdata_desc_size;
500 rte_cpu_to_le_64(txq->data_ring.basePA +
503 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506 gdesc->dword[2] = dw2 | m_seg->data_len;
509 /* move to the next2fill descriptor */
510 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
512 /* use the right gen for non-SOP desc */
513 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514 } while ((m_seg = m_seg->next) != NULL);
516 /* set the last buf_info for the pkt */
518 /* Update the EOP descriptor */
519 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
521 /* Add VLAN tag if present */
522 gdesc = txq->cmd_ring.base + first2fill;
523 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
525 gdesc->txd.tci = txm->vlan_tci;
529 uint16_t mss = txm->tso_segsz;
533 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534 gdesc->txd.om = VMXNET3_OM_TSO;
535 gdesc->txd.msscof = mss;
537 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539 gdesc->txd.om = VMXNET3_OM_CSUM;
540 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
542 switch (txm->ol_flags & PKT_TX_L4_MASK) {
543 case PKT_TX_TCP_CKSUM:
544 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
546 case PKT_TX_UDP_CKSUM:
547 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
550 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
551 txm->ol_flags & PKT_TX_L4_MASK);
557 gdesc->txd.om = VMXNET3_OM_NONE;
558 gdesc->txd.msscof = 0;
562 /* flip the GEN bit on the SOP */
563 rte_compiler_barrier();
564 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
566 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
572 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
573 txq_ctrl->txNumDeferred = 0;
574 /* Notify vSwitch that packets are available. */
575 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
576 txq->cmd_ring.next2fill);
583 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
584 struct rte_mbuf *mbuf)
587 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
588 struct Vmxnet3_RxDesc *rxd =
589 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
590 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
593 /* Usually: One HEAD type buf per packet
594 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
595 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
598 /* We use single packet buffer so all heads here */
599 val = VMXNET3_RXD_BTYPE_HEAD;
601 /* All BODY type buffers for 2nd ring */
602 val = VMXNET3_RXD_BTYPE_BODY;
606 * Load mbuf pointer into buf_info[ring_size]
607 * buf_info structure is equivalent to cookie for virtio-virtqueue
610 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
611 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
613 /* Load Rx Descriptor with the buffer's GPA */
614 rxd->addr = buf_info->bufPA;
616 /* After this point rxd->addr MUST not be NULL */
618 rxd->len = buf_info->len;
619 /* Flip gen bit at the end to change ownership */
620 rxd->gen = ring->gen;
622 vmxnet3_cmd_ring_adv_next2fill(ring);
625 * Allocates mbufs and clusters. Post rx descriptors with buffer details
626 * so that device can receive packets in those buffers.
628 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
629 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
630 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
631 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
635 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
641 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
642 struct rte_mbuf *mbuf;
644 /* Allocate blank mbuf for the current Rx Descriptor */
645 mbuf = rte_mbuf_raw_alloc(rxq->mp);
646 if (unlikely(mbuf == NULL)) {
647 PMD_RX_LOG(ERR, "Error allocating mbuf");
648 rxq->stats.rx_buf_alloc_failure++;
653 vmxnet3_renew_desc(rxq, ring_id, mbuf);
657 /* Return error only if no buffers are posted at present */
658 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664 /* MSS not provided by vmxnet3, guess one with available information */
666 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
667 struct rte_mbuf *rxm)
670 struct ipv4_hdr *ipv4_hdr;
671 struct ipv6_hdr *ipv6_hdr;
672 struct tcp_hdr *tcp_hdr;
675 RTE_ASSERT(rcd->tcp);
677 ptr = rte_pktmbuf_mtod(rxm, char *);
678 slen = rte_pktmbuf_data_len(rxm);
679 hlen = sizeof(struct ether_hdr);
682 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
683 return hw->mtu - sizeof(struct ipv4_hdr)
684 - sizeof(struct tcp_hdr);
686 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
687 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
689 } else if (rcd->v6) {
690 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
691 return hw->mtu - sizeof(struct ipv6_hdr) -
692 sizeof(struct tcp_hdr);
694 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
695 hlen += sizeof(struct ipv6_hdr);
696 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
699 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
704 if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
705 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
706 sizeof(struct ether_hdr);
708 tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
709 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
711 if (rxm->udata64 > 1)
712 return (rte_pktmbuf_pkt_len(rxm) - hlen +
713 rxm->udata64 - 1) / rxm->udata64;
715 return hw->mtu - hlen + sizeof(struct ether_hdr);
718 /* Receive side checksum and other offloads */
720 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
721 struct rte_mbuf *rxm, const uint8_t sop)
723 uint64_t ol_flags = rxm->ol_flags;
724 uint32_t packet_type = rxm->packet_type;
726 /* Offloads set in sop */
728 /* Set packet type */
729 packet_type |= RTE_PTYPE_L2_ETHER;
731 /* Check large packet receive */
732 if (VMXNET3_VERSION_GE_2(hw) &&
733 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
734 const Vmxnet3_RxCompDescExt *rcde =
735 (const Vmxnet3_RxCompDescExt *)rcd;
737 rxm->tso_segsz = rcde->mss;
738 rxm->udata64 = rcde->segCnt;
739 ol_flags |= PKT_RX_LRO;
741 } else { /* Offloads set in eop */
743 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
744 ol_flags |= PKT_RX_RSS_HASH;
745 rxm->hash.rss = rcd->rssHash;
748 /* Check for hardware stripped VLAN tag */
750 ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
751 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
754 /* Check packet type, checksum errors, etc. */
756 ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
759 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
762 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
764 ol_flags |= PKT_RX_IP_CKSUM_BAD;
767 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
769 packet_type |= RTE_PTYPE_L4_TCP;
771 packet_type |= RTE_PTYPE_L4_UDP;
774 packet_type |= RTE_PTYPE_L4_TCP;
775 ol_flags |= PKT_RX_L4_CKSUM_BAD;
776 } else if (rcd->udp) {
777 packet_type |= RTE_PTYPE_L4_UDP;
778 ol_flags |= PKT_RX_L4_CKSUM_BAD;
781 } else if (rcd->v6) {
782 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
785 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
787 packet_type |= RTE_PTYPE_L4_TCP;
789 packet_type |= RTE_PTYPE_L4_UDP;
792 packet_type |= RTE_PTYPE_L4_TCP;
793 ol_flags |= PKT_RX_L4_CKSUM_BAD;
794 } else if (rcd->udp) {
795 packet_type |= RTE_PTYPE_L4_UDP;
796 ol_flags |= PKT_RX_L4_CKSUM_BAD;
800 packet_type |= RTE_PTYPE_UNKNOWN;
803 /* Old variants of vmxnet3 do not provide MSS */
804 if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
805 rxm->tso_segsz = vmxnet3_guess_mss(hw,
810 rxm->ol_flags = ol_flags;
811 rxm->packet_type = packet_type;
815 * Process the Rx Completion Ring of given vmxnet3_rx_queue
816 * for nb_pkts burst and return the number of packets received
819 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
822 uint32_t nb_rxd, idx;
824 vmxnet3_rx_queue_t *rxq;
825 Vmxnet3_RxCompDesc *rcd;
826 vmxnet3_buf_info_t *rbi;
828 struct rte_mbuf *rxm = NULL;
829 struct vmxnet3_hw *hw;
839 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
841 if (unlikely(rxq->stopped)) {
842 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846 while (rcd->gen == rxq->comp_ring.gen) {
847 struct rte_mbuf *newm;
849 if (nb_rx >= nb_pkts)
852 newm = rte_mbuf_raw_alloc(rxq->mp);
853 if (unlikely(newm == NULL)) {
854 PMD_RX_LOG(ERR, "Error allocating mbuf");
855 rxq->stats.rx_buf_alloc_failure++;
860 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
861 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
862 RTE_SET_USED(rxd); /* used only for assert when enabled */
863 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
865 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
867 RTE_ASSERT(rcd->len <= rxd->len);
870 /* Get the packet buffer pointer from buf_info */
873 /* Clear descriptor associated buf_info to be reused */
877 /* Update the index that we received a packet */
878 rxq->cmd_ring[ring_idx].next2comp = idx;
880 /* For RCD with EOP set, check if there is frame error */
881 if (unlikely(rcd->eop && rcd->err)) {
882 rxq->stats.drop_total++;
883 rxq->stats.drop_err++;
886 rxq->stats.drop_fcs++;
887 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
889 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
890 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
891 rxq->comp_ring.base), rcd->rxdIdx);
892 rte_pktmbuf_free_seg(rxm);
893 if (rxq->start_seg) {
894 struct rte_mbuf *start = rxq->start_seg;
896 rxq->start_seg = NULL;
897 rte_pktmbuf_free(start);
902 /* Initialize newly received packet buffer */
903 rxm->port = rxq->port_id;
906 rxm->pkt_len = (uint16_t)rcd->len;
907 rxm->data_len = (uint16_t)rcd->len;
908 rxm->data_off = RTE_PKTMBUF_HEADROOM;
911 rxm->packet_type = 0;
914 * If this is the first buffer of the received packet,
915 * set the pointer to the first mbuf of the packet
916 * Otherwise, update the total length and the number of segments
917 * of the current scattered packet, and update the pointer to
918 * the last mbuf of the current packet.
921 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
923 if (unlikely(rcd->len == 0)) {
924 RTE_ASSERT(rcd->eop);
927 "Rx buf was skipped. rxring[%d][%d])",
929 rte_pktmbuf_free_seg(rxm);
933 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
934 uint8_t *rdd = rxq->data_ring.base +
935 idx * rxq->data_desc_size;
937 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
938 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942 rxq->start_seg = rxm;
944 vmxnet3_rx_offload(hw, rcd, rxm, 1);
946 struct rte_mbuf *start = rxq->start_seg;
948 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
951 start->pkt_len += rxm->data_len;
954 rxq->last_seg->next = rxm;
957 rte_pktmbuf_free_seg(rxm);
962 struct rte_mbuf *start = rxq->start_seg;
964 vmxnet3_rx_offload(hw, rcd, start, 0);
965 rx_pkts[nb_rx++] = start;
966 rxq->start_seg = NULL;
970 rxq->cmd_ring[ring_idx].next2comp = idx;
971 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
972 rxq->cmd_ring[ring_idx].size);
974 /* It's time to renew descriptors */
975 vmxnet3_renew_desc(rxq, ring_idx, newm);
976 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
977 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
978 rxq->cmd_ring[ring_idx].next2fill);
981 /* Advance to the next descriptor in comp_ring */
982 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
984 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
986 if (nb_rxd > rxq->cmd_ring[0].size) {
987 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
988 " relinquish control.");
993 if (unlikely(nb_rxd == 0)) {
995 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
996 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
997 if (unlikely(avail > 0)) {
998 /* try to alloc new buf and renew descriptors */
999 vmxnet3_post_rx_bufs(rxq, ring_idx);
1002 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1003 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1004 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1005 rxq->cmd_ring[ring_idx].next2fill);
1014 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1017 unsigned int socket_id,
1018 const struct rte_eth_txconf *tx_conf __rte_unused)
1020 struct vmxnet3_hw *hw = dev->data->dev_private;
1021 const struct rte_memzone *mz;
1022 struct vmxnet3_tx_queue *txq;
1023 struct vmxnet3_cmd_ring *ring;
1024 struct vmxnet3_comp_ring *comp_ring;
1025 struct vmxnet3_data_ring *data_ring;
1028 PMD_INIT_FUNC_TRACE();
1030 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1031 RTE_CACHE_LINE_SIZE);
1033 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1037 txq->queue_id = queue_idx;
1038 txq->port_id = dev->data->port_id;
1039 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1041 txq->qid = queue_idx;
1042 txq->stopped = TRUE;
1043 txq->txdata_desc_size = hw->txdata_desc_size;
1045 ring = &txq->cmd_ring;
1046 comp_ring = &txq->comp_ring;
1047 data_ring = &txq->data_ring;
1049 /* Tx vmxnet ring length should be between 512-4096 */
1050 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1051 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1052 VMXNET3_DEF_TX_RING_SIZE);
1054 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1055 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1056 VMXNET3_TX_RING_MAX_SIZE);
1059 ring->size = nb_desc;
1060 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1062 comp_ring->size = data_ring->size = ring->size;
1064 /* Tx vmxnet rings structure initialization*/
1065 ring->next2fill = 0;
1066 ring->next2comp = 0;
1067 ring->gen = VMXNET3_INIT_GEN;
1068 comp_ring->next2proc = 0;
1069 comp_ring->gen = VMXNET3_INIT_GEN;
1071 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1072 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1073 size += txq->txdata_desc_size * data_ring->size;
1075 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1076 VMXNET3_RING_BA_ALIGN, socket_id);
1078 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1082 memset(mz->addr, 0, mz->len);
1084 /* cmd_ring initialization */
1085 ring->base = mz->addr;
1086 ring->basePA = mz->iova;
1088 /* comp_ring initialization */
1089 comp_ring->base = ring->base + ring->size;
1090 comp_ring->basePA = ring->basePA +
1091 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1093 /* data_ring initialization */
1094 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1095 data_ring->basePA = comp_ring->basePA +
1096 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1098 /* cmd_ring0 buf_info allocation */
1099 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1100 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1101 if (ring->buf_info == NULL) {
1102 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1106 /* Update the data portion with txq */
1107 dev->data->tx_queues[queue_idx] = txq;
1113 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1116 unsigned int socket_id,
1117 __rte_unused const struct rte_eth_rxconf *rx_conf,
1118 struct rte_mempool *mp)
1120 const struct rte_memzone *mz;
1121 struct vmxnet3_rx_queue *rxq;
1122 struct vmxnet3_hw *hw = dev->data->dev_private;
1123 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1124 struct vmxnet3_comp_ring *comp_ring;
1125 struct vmxnet3_rx_data_ring *data_ring;
1130 PMD_INIT_FUNC_TRACE();
1132 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1133 RTE_CACHE_LINE_SIZE);
1135 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1140 rxq->queue_id = queue_idx;
1141 rxq->port_id = dev->data->port_id;
1142 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1144 rxq->qid1 = queue_idx;
1145 rxq->qid2 = queue_idx + hw->num_rx_queues;
1146 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1147 rxq->data_desc_size = hw->rxdata_desc_size;
1148 rxq->stopped = TRUE;
1150 ring0 = &rxq->cmd_ring[0];
1151 ring1 = &rxq->cmd_ring[1];
1152 comp_ring = &rxq->comp_ring;
1153 data_ring = &rxq->data_ring;
1155 /* Rx vmxnet rings length should be between 256-4096 */
1156 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1157 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1159 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1160 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1163 ring0->size = nb_desc;
1164 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1165 ring1->size = ring0->size;
1168 comp_ring->size = ring0->size + ring1->size;
1169 data_ring->size = ring0->size;
1171 /* Rx vmxnet rings structure initialization */
1172 ring0->next2fill = 0;
1173 ring1->next2fill = 0;
1174 ring0->next2comp = 0;
1175 ring1->next2comp = 0;
1176 ring0->gen = VMXNET3_INIT_GEN;
1177 ring1->gen = VMXNET3_INIT_GEN;
1178 comp_ring->next2proc = 0;
1179 comp_ring->gen = VMXNET3_INIT_GEN;
1181 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1182 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1183 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1184 size += rxq->data_desc_size * data_ring->size;
1186 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1187 VMXNET3_RING_BA_ALIGN, socket_id);
1189 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1193 memset(mz->addr, 0, mz->len);
1195 /* cmd_ring0 initialization */
1196 ring0->base = mz->addr;
1197 ring0->basePA = mz->iova;
1199 /* cmd_ring1 initialization */
1200 ring1->base = ring0->base + ring0->size;
1201 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1203 /* comp_ring initialization */
1204 comp_ring->base = ring1->base + ring1->size;
1205 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1208 /* data_ring initialization */
1209 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1211 (uint8_t *)(comp_ring->base + comp_ring->size);
1212 data_ring->basePA = comp_ring->basePA +
1213 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1216 /* cmd_ring0-cmd_ring1 buf_info allocation */
1217 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1219 ring = &rxq->cmd_ring[i];
1221 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1223 ring->buf_info = rte_zmalloc(mem_name,
1224 ring->size * sizeof(vmxnet3_buf_info_t),
1225 RTE_CACHE_LINE_SIZE);
1226 if (ring->buf_info == NULL) {
1227 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1232 /* Update the data portion with rxq */
1233 dev->data->rx_queues[queue_idx] = rxq;
1239 * Initializes Receive Unit
1240 * Load mbufs in rx queue in advance
1243 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1245 struct vmxnet3_hw *hw = dev->data->dev_private;
1250 PMD_INIT_FUNC_TRACE();
1252 for (i = 0; i < hw->num_rx_queues; i++) {
1253 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1255 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1256 /* Passing 0 as alloc_num will allocate full ring */
1257 ret = vmxnet3_post_rx_bufs(rxq, j);
1260 "ERROR: Posting Rxq: %d buffers ring: %d",
1265 * Updating device with the index:next2fill to fill the
1266 * mbufs for coming packets.
1268 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1269 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1270 rxq->cmd_ring[j].next2fill);
1273 rxq->stopped = FALSE;
1274 rxq->start_seg = NULL;
1277 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1278 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1280 txq->stopped = FALSE;
1286 static uint8_t rss_intel_key[40] = {
1287 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1288 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1289 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1290 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1291 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1295 * Configure RSS feature
1298 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1300 struct vmxnet3_hw *hw = dev->data->dev_private;
1301 struct VMXNET3_RSSConf *dev_rss_conf;
1302 struct rte_eth_rss_conf *port_rss_conf;
1306 PMD_INIT_FUNC_TRACE();
1308 dev_rss_conf = hw->rss_conf;
1309 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1311 /* loading hashFunc */
1312 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1313 /* loading hashKeySize */
1314 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1315 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1316 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1318 if (port_rss_conf->rss_key == NULL) {
1319 /* Default hash key */
1320 port_rss_conf->rss_key = rss_intel_key;
1323 /* loading hashKey */
1324 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1325 dev_rss_conf->hashKeySize);
1327 /* loading indTable */
1328 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1329 if (j == dev->data->nb_rx_queues)
1331 dev_rss_conf->indTable[i] = j;
1334 /* loading hashType */
1335 dev_rss_conf->hashType = 0;
1336 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1337 if (rss_hf & ETH_RSS_IPV4)
1338 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1339 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1340 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1341 if (rss_hf & ETH_RSS_IPV6)
1342 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1343 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1344 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1346 return VMXNET3_SUCCESS;