1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
57 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
78 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82 (unsigned long)rxq->cmd_ring[0].basePA,
83 (unsigned long)rxq->cmd_ring[1].basePA,
84 (unsigned long)rxq->comp_ring.basePA);
86 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89 (uint32_t)rxq->cmd_ring[0].size, avail,
90 rxq->comp_ring.next2proc,
91 rxq->cmd_ring[0].size - avail);
93 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96 rxq->cmd_ring[1].size - avail);
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
108 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111 (unsigned long)txq->cmd_ring.basePA,
112 (unsigned long)txq->comp_ring.basePA,
113 (unsigned long)txq->data_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117 (uint32_t)txq->cmd_ring.size, avail,
118 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 while (ring->next2comp != ring->next2fill) {
126 /* No need to worry about desc ownership, device is quiesced by now. */
127 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 rte_pktmbuf_free(buf_info->m);
135 vmxnet3_cmd_ring_adv_next2comp(ring);
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
144 for (i = 0; i < ring->size; i++) {
145 /* No need to worry about desc ownership, device is quiesced by now. */
146 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 rte_pktmbuf_free_seg(buf_info->m);
154 vmxnet3_cmd_ring_adv_next2comp(ring);
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 rte_free(ring->buf_info);
162 ring->buf_info = NULL;
166 vmxnet3_dev_tx_queue_release(void *txq)
168 vmxnet3_tx_queue_t *tq = txq;
172 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173 /* Release the cmd_ring */
174 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175 /* Release the memzone */
176 rte_memzone_free(tq->mz);
177 /* Release the queue */
183 vmxnet3_dev_rx_queue_release(void *rxq)
186 vmxnet3_rx_queue_t *rq = rxq;
190 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193 /* Release both the cmd_rings */
194 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197 /* Release the memzone */
198 rte_memzone_free(rq->mz);
200 /* Release the queue */
206 vmxnet3_dev_tx_queue_reset(void *txq)
208 vmxnet3_tx_queue_t *tq = txq;
209 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
215 /* Release the cmd_ring mbufs */
216 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 /* Tx vmxnet rings structure initialization*/
222 ring->gen = VMXNET3_INIT_GEN;
223 comp_ring->next2proc = 0;
224 comp_ring->gen = VMXNET3_INIT_GEN;
226 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228 size += tq->txdata_desc_size * data_ring->size;
230 memset(ring->base, 0, size);
234 vmxnet3_dev_rx_queue_reset(void *rxq)
237 vmxnet3_rx_queue_t *rq = rxq;
238 struct vmxnet3_hw *hw = rq->hw;
239 struct vmxnet3_cmd_ring *ring0, *ring1;
240 struct vmxnet3_comp_ring *comp_ring;
241 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 /* Release both the cmd_rings mbufs */
245 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248 ring0 = &rq->cmd_ring[0];
249 ring1 = &rq->cmd_ring[1];
250 comp_ring = &rq->comp_ring;
252 /* Rx vmxnet rings structure initialization */
253 ring0->next2fill = 0;
254 ring1->next2fill = 0;
255 ring0->next2comp = 0;
256 ring1->next2comp = 0;
257 ring0->gen = VMXNET3_INIT_GEN;
258 ring1->gen = VMXNET3_INIT_GEN;
259 comp_ring->next2proc = 0;
260 comp_ring->gen = VMXNET3_INIT_GEN;
262 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265 size += rq->data_desc_size * data_ring->size;
267 memset(ring0->base, 0, size);
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
275 PMD_INIT_FUNC_TRACE();
277 for (i = 0; i < dev->data->nb_tx_queues; i++) {
278 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
282 vmxnet3_dev_tx_queue_reset(txq);
286 for (i = 0; i < dev->data->nb_rx_queues; i++) {
287 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
291 vmxnet3_dev_rx_queue_reset(rxq);
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 struct rte_mbuf *mbuf;
302 /* Release cmd_ring descriptor and free mbuf */
303 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 rte_panic("EOP desc does not point to a valid mbuf");
308 rte_pktmbuf_free(mbuf);
310 txq->cmd_ring.buf_info[eop_idx].m = NULL;
312 while (txq->cmd_ring.next2comp != eop_idx) {
313 /* no out-of-order completion */
314 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
319 /* Mark the txd for which tcd was generated as completed */
320 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 return completed + 1;
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331 (comp_ring->base + comp_ring->next2proc);
333 while (tcd->gen == comp_ring->gen) {
334 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338 comp_ring->next2proc);
341 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
353 for (i = 0; i != nb_pkts; i++) {
355 ol_flags = m->ol_flags;
357 /* Non-TSO packet cannot occupy more than
358 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
360 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 /* check that only supported TX offloads are requested. */
367 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368 (ol_flags & PKT_TX_L4_MASK) ==
370 rte_errno = -ENOTSUP;
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375 ret = rte_validate_tx_offload(m);
381 ret = rte_net_intel_cksum_prepare(m);
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
396 vmxnet3_tx_queue_t *txq = tx_queue;
397 struct vmxnet3_hw *hw = txq->hw;
398 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
401 if (unlikely(txq->stopped)) {
402 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
406 /* Free up the comp_descriptors aggressively */
407 vmxnet3_tq_tx_complete(txq);
410 while (nb_tx < nb_pkts) {
411 Vmxnet3_GenericDesc *gdesc;
412 vmxnet3_buf_info_t *tbi;
413 uint32_t first2fill, avail, dw2;
414 struct rte_mbuf *txm = tx_pkts[nb_tx];
415 struct rte_mbuf *m_seg = txm;
417 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418 /* # of descriptors needed for a packet. */
419 unsigned count = txm->nb_segs;
421 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
423 /* Is command ring full? */
424 if (unlikely(avail == 0)) {
425 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426 txq->stats.tx_ring_full++;
427 txq->stats.drop_total += (nb_pkts - nb_tx);
431 /* Command ring is not full but cannot handle the
432 * multi-segmented packet. Let's try the next packet
435 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436 "(avail %d needed %d)", avail, count);
437 txq->stats.drop_total++;
439 txq->stats.drop_tso++;
440 rte_pktmbuf_free(txm);
445 /* Drop non-TSO packet that is excessively fragmented */
446 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449 txq->stats.drop_too_many_segs++;
450 txq->stats.drop_total++;
451 rte_pktmbuf_free(txm);
456 if (txm->nb_segs == 1 &&
457 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458 struct Vmxnet3_TxDataDesc *tdd;
460 /* Skip empty packets */
461 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
462 txq->stats.drop_total++;
463 rte_pktmbuf_free(txm);
468 tdd = (struct Vmxnet3_TxDataDesc *)
469 ((uint8 *)txq->data_ring.base +
470 txq->cmd_ring.next2fill *
471 txq->txdata_desc_size);
472 copy_size = rte_pktmbuf_pkt_len(txm);
473 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 /* use the previous gen bit for the SOP desc */
477 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
478 first2fill = txq->cmd_ring.next2fill;
480 /* Remember the transmit buffer for cleanup */
481 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
483 /* NB: the following assumes that VMXNET3 maximum
484 * transmit buffer size (16K) is greater than
485 * maximum size of mbuf segment size.
487 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
489 /* Skip empty segments */
490 if (unlikely(m_seg->data_len == 0))
495 (uint64)txq->cmd_ring.next2fill *
496 txq->txdata_desc_size;
498 rte_cpu_to_le_64(txq->data_ring.basePA +
501 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 gdesc->dword[2] = dw2 | m_seg->data_len;
507 /* move to the next2fill descriptor */
508 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
510 /* use the right gen for non-SOP desc */
511 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
512 } while ((m_seg = m_seg->next) != NULL);
514 /* set the last buf_info for the pkt */
516 /* Update the EOP descriptor */
517 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
519 /* Add VLAN tag if present */
520 gdesc = txq->cmd_ring.base + first2fill;
521 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
523 gdesc->txd.tci = txm->vlan_tci;
527 uint16_t mss = txm->tso_segsz;
531 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
532 gdesc->txd.om = VMXNET3_OM_TSO;
533 gdesc->txd.msscof = mss;
535 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
536 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
537 gdesc->txd.om = VMXNET3_OM_CSUM;
538 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
540 switch (txm->ol_flags & PKT_TX_L4_MASK) {
541 case PKT_TX_TCP_CKSUM:
542 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
544 case PKT_TX_UDP_CKSUM:
545 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
548 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
549 txm->ol_flags & PKT_TX_L4_MASK);
555 gdesc->txd.om = VMXNET3_OM_NONE;
556 gdesc->txd.msscof = 0;
560 /* flip the GEN bit on the SOP */
561 rte_compiler_barrier();
562 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
564 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
568 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
570 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
571 txq_ctrl->txNumDeferred = 0;
572 /* Notify vSwitch that packets are available. */
573 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
574 txq->cmd_ring.next2fill);
581 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
582 struct rte_mbuf *mbuf)
585 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
586 struct Vmxnet3_RxDesc *rxd =
587 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
588 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
591 /* Usually: One HEAD type buf per packet
592 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
593 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
596 /* We use single packet buffer so all heads here */
597 val = VMXNET3_RXD_BTYPE_HEAD;
599 /* All BODY type buffers for 2nd ring */
600 val = VMXNET3_RXD_BTYPE_BODY;
604 * Load mbuf pointer into buf_info[ring_size]
605 * buf_info structure is equivalent to cookie for virtio-virtqueue
608 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
611 /* Load Rx Descriptor with the buffer's GPA */
612 rxd->addr = buf_info->bufPA;
614 /* After this point rxd->addr MUST not be NULL */
616 rxd->len = buf_info->len;
617 /* Flip gen bit at the end to change ownership */
618 rxd->gen = ring->gen;
620 vmxnet3_cmd_ring_adv_next2fill(ring);
623 * Allocates mbufs and clusters. Post rx descriptors with buffer details
624 * so that device can receive packets in those buffers.
626 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
627 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
628 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
629 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
633 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
637 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
639 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640 struct rte_mbuf *mbuf;
642 /* Allocate blank mbuf for the current Rx Descriptor */
643 mbuf = rte_mbuf_raw_alloc(rxq->mp);
644 if (unlikely(mbuf == NULL)) {
645 PMD_RX_LOG(ERR, "Error allocating mbuf");
646 rxq->stats.rx_buf_alloc_failure++;
651 vmxnet3_renew_desc(rxq, ring_id, mbuf);
655 /* Return error only if no buffers are posted at present */
656 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662 /* MSS not provided by vmxnet3, guess one with available information */
664 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
665 struct rte_mbuf *rxm)
668 struct ipv4_hdr *ipv4_hdr;
669 struct ipv6_hdr *ipv6_hdr;
670 struct tcp_hdr *tcp_hdr;
673 RTE_ASSERT(rcd->tcp);
675 ptr = rte_pktmbuf_mtod(rxm, char *);
676 slen = rte_pktmbuf_data_len(rxm);
677 hlen = sizeof(struct ether_hdr);
680 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
681 return hw->mtu - sizeof(struct ipv4_hdr)
682 - sizeof(struct tcp_hdr);
684 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
685 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
687 } else if (rcd->v6) {
688 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
689 return hw->mtu - sizeof(struct ipv6_hdr) -
690 sizeof(struct tcp_hdr);
692 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
693 hlen += sizeof(struct ipv6_hdr);
694 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
697 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
702 if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
703 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
704 sizeof(struct ether_hdr);
706 tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
707 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
709 if (rxm->udata64 > 1)
710 return (rte_pktmbuf_pkt_len(rxm) - hlen +
711 rxm->udata64 - 1) / rxm->udata64;
713 return hw->mtu - hlen + sizeof(struct ether_hdr);
716 /* Receive side checksum and other offloads */
718 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
719 struct rte_mbuf *rxm, const uint8_t sop)
721 uint64_t ol_flags = rxm->ol_flags;
722 uint32_t packet_type = rxm->packet_type;
724 /* Offloads set in sop */
726 /* Set packet type */
727 packet_type |= RTE_PTYPE_L2_ETHER;
729 /* Check large packet receive */
730 if (VMXNET3_VERSION_GE_2(hw) &&
731 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
732 const Vmxnet3_RxCompDescExt *rcde =
733 (const Vmxnet3_RxCompDescExt *)rcd;
735 rxm->tso_segsz = rcde->mss;
736 rxm->udata64 = rcde->segCnt;
737 ol_flags |= PKT_RX_LRO;
739 } else { /* Offloads set in eop */
741 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
742 ol_flags |= PKT_RX_RSS_HASH;
743 rxm->hash.rss = rcd->rssHash;
746 /* Check for hardware stripped VLAN tag */
748 ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
749 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
752 /* Check packet type, checksum errors, etc. */
754 ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
757 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
760 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
762 ol_flags |= PKT_RX_IP_CKSUM_BAD;
765 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
767 packet_type |= RTE_PTYPE_L4_TCP;
769 packet_type |= RTE_PTYPE_L4_UDP;
772 packet_type |= RTE_PTYPE_L4_TCP;
773 ol_flags |= PKT_RX_L4_CKSUM_BAD;
774 } else if (rcd->udp) {
775 packet_type |= RTE_PTYPE_L4_UDP;
776 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 } else if (rcd->v6) {
780 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
783 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
785 packet_type |= RTE_PTYPE_L4_TCP;
787 packet_type |= RTE_PTYPE_L4_UDP;
790 packet_type |= RTE_PTYPE_L4_TCP;
791 ol_flags |= PKT_RX_L4_CKSUM_BAD;
792 } else if (rcd->udp) {
793 packet_type |= RTE_PTYPE_L4_UDP;
794 ol_flags |= PKT_RX_L4_CKSUM_BAD;
798 packet_type |= RTE_PTYPE_UNKNOWN;
801 /* Old variants of vmxnet3 do not provide MSS */
802 if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
803 rxm->tso_segsz = vmxnet3_guess_mss(hw,
808 rxm->ol_flags = ol_flags;
809 rxm->packet_type = packet_type;
813 * Process the Rx Completion Ring of given vmxnet3_rx_queue
814 * for nb_pkts burst and return the number of packets received
817 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
820 uint32_t nb_rxd, idx;
822 vmxnet3_rx_queue_t *rxq;
823 Vmxnet3_RxCompDesc *rcd;
824 vmxnet3_buf_info_t *rbi;
826 struct rte_mbuf *rxm = NULL;
827 struct vmxnet3_hw *hw;
837 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
839 if (unlikely(rxq->stopped)) {
840 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
844 while (rcd->gen == rxq->comp_ring.gen) {
845 struct rte_mbuf *newm;
847 if (nb_rx >= nb_pkts)
850 newm = rte_mbuf_raw_alloc(rxq->mp);
851 if (unlikely(newm == NULL)) {
852 PMD_RX_LOG(ERR, "Error allocating mbuf");
853 rxq->stats.rx_buf_alloc_failure++;
858 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
859 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
860 RTE_SET_USED(rxd); /* used only for assert when enabled */
861 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
863 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
865 RTE_ASSERT(rcd->len <= rxd->len);
868 /* Get the packet buffer pointer from buf_info */
871 /* Clear descriptor associated buf_info to be reused */
875 /* Update the index that we received a packet */
876 rxq->cmd_ring[ring_idx].next2comp = idx;
878 /* For RCD with EOP set, check if there is frame error */
879 if (unlikely(rcd->eop && rcd->err)) {
880 rxq->stats.drop_total++;
881 rxq->stats.drop_err++;
884 rxq->stats.drop_fcs++;
885 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
887 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
888 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
889 rxq->comp_ring.base), rcd->rxdIdx);
890 rte_pktmbuf_free_seg(rxm);
891 if (rxq->start_seg) {
892 struct rte_mbuf *start = rxq->start_seg;
894 rxq->start_seg = NULL;
895 rte_pktmbuf_free(start);
900 /* Initialize newly received packet buffer */
901 rxm->port = rxq->port_id;
904 rxm->pkt_len = (uint16_t)rcd->len;
905 rxm->data_len = (uint16_t)rcd->len;
906 rxm->data_off = RTE_PKTMBUF_HEADROOM;
909 rxm->packet_type = 0;
912 * If this is the first buffer of the received packet,
913 * set the pointer to the first mbuf of the packet
914 * Otherwise, update the total length and the number of segments
915 * of the current scattered packet, and update the pointer to
916 * the last mbuf of the current packet.
919 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
921 if (unlikely(rcd->len == 0)) {
922 RTE_ASSERT(rcd->eop);
925 "Rx buf was skipped. rxring[%d][%d])",
927 rte_pktmbuf_free_seg(rxm);
931 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
932 uint8_t *rdd = rxq->data_ring.base +
933 idx * rxq->data_desc_size;
935 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
936 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
940 rxq->start_seg = rxm;
942 vmxnet3_rx_offload(hw, rcd, rxm, 1);
944 struct rte_mbuf *start = rxq->start_seg;
946 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
949 start->pkt_len += rxm->data_len;
952 rxq->last_seg->next = rxm;
955 rte_pktmbuf_free_seg(rxm);
960 struct rte_mbuf *start = rxq->start_seg;
962 vmxnet3_rx_offload(hw, rcd, start, 0);
963 rx_pkts[nb_rx++] = start;
964 rxq->start_seg = NULL;
968 rxq->cmd_ring[ring_idx].next2comp = idx;
969 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
970 rxq->cmd_ring[ring_idx].size);
972 /* It's time to renew descriptors */
973 vmxnet3_renew_desc(rxq, ring_idx, newm);
974 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
975 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
976 rxq->cmd_ring[ring_idx].next2fill);
979 /* Advance to the next descriptor in comp_ring */
980 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
982 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
984 if (nb_rxd > rxq->cmd_ring[0].size) {
985 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
986 " relinquish control.");
991 if (unlikely(nb_rxd == 0)) {
993 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
994 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
995 if (unlikely(avail > 0)) {
996 /* try to alloc new buf and renew descriptors */
997 vmxnet3_post_rx_bufs(rxq, ring_idx);
1000 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1001 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1003 rxq->cmd_ring[ring_idx].next2fill);
1012 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1015 unsigned int socket_id,
1016 const struct rte_eth_txconf *tx_conf)
1018 struct vmxnet3_hw *hw = dev->data->dev_private;
1019 const struct rte_memzone *mz;
1020 struct vmxnet3_tx_queue *txq;
1021 struct vmxnet3_cmd_ring *ring;
1022 struct vmxnet3_comp_ring *comp_ring;
1023 struct vmxnet3_data_ring *data_ring;
1026 PMD_INIT_FUNC_TRACE();
1028 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
1029 ETH_TXQ_FLAGS_NOXSUMSCTP) {
1030 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
1034 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1035 RTE_CACHE_LINE_SIZE);
1037 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1041 txq->queue_id = queue_idx;
1042 txq->port_id = dev->data->port_id;
1043 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1045 txq->qid = queue_idx;
1046 txq->stopped = TRUE;
1047 txq->txdata_desc_size = hw->txdata_desc_size;
1049 ring = &txq->cmd_ring;
1050 comp_ring = &txq->comp_ring;
1051 data_ring = &txq->data_ring;
1053 /* Tx vmxnet ring length should be between 512-4096 */
1054 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1055 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1056 VMXNET3_DEF_TX_RING_SIZE);
1058 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1059 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1060 VMXNET3_TX_RING_MAX_SIZE);
1063 ring->size = nb_desc;
1064 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1066 comp_ring->size = data_ring->size = ring->size;
1068 /* Tx vmxnet rings structure initialization*/
1069 ring->next2fill = 0;
1070 ring->next2comp = 0;
1071 ring->gen = VMXNET3_INIT_GEN;
1072 comp_ring->next2proc = 0;
1073 comp_ring->gen = VMXNET3_INIT_GEN;
1075 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1076 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1077 size += txq->txdata_desc_size * data_ring->size;
1079 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1080 VMXNET3_RING_BA_ALIGN, socket_id);
1082 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1086 memset(mz->addr, 0, mz->len);
1088 /* cmd_ring initialization */
1089 ring->base = mz->addr;
1090 ring->basePA = mz->iova;
1092 /* comp_ring initialization */
1093 comp_ring->base = ring->base + ring->size;
1094 comp_ring->basePA = ring->basePA +
1095 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1097 /* data_ring initialization */
1098 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1099 data_ring->basePA = comp_ring->basePA +
1100 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1102 /* cmd_ring0 buf_info allocation */
1103 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1104 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1105 if (ring->buf_info == NULL) {
1106 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1110 /* Update the data portion with txq */
1111 dev->data->tx_queues[queue_idx] = txq;
1117 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1120 unsigned int socket_id,
1121 __rte_unused const struct rte_eth_rxconf *rx_conf,
1122 struct rte_mempool *mp)
1124 const struct rte_memzone *mz;
1125 struct vmxnet3_rx_queue *rxq;
1126 struct vmxnet3_hw *hw = dev->data->dev_private;
1127 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1128 struct vmxnet3_comp_ring *comp_ring;
1129 struct vmxnet3_rx_data_ring *data_ring;
1134 PMD_INIT_FUNC_TRACE();
1136 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1137 RTE_CACHE_LINE_SIZE);
1139 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1144 rxq->queue_id = queue_idx;
1145 rxq->port_id = dev->data->port_id;
1146 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1148 rxq->qid1 = queue_idx;
1149 rxq->qid2 = queue_idx + hw->num_rx_queues;
1150 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1151 rxq->data_desc_size = hw->rxdata_desc_size;
1152 rxq->stopped = TRUE;
1154 ring0 = &rxq->cmd_ring[0];
1155 ring1 = &rxq->cmd_ring[1];
1156 comp_ring = &rxq->comp_ring;
1157 data_ring = &rxq->data_ring;
1159 /* Rx vmxnet rings length should be between 256-4096 */
1160 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1161 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1163 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1164 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1167 ring0->size = nb_desc;
1168 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1169 ring1->size = ring0->size;
1172 comp_ring->size = ring0->size + ring1->size;
1173 data_ring->size = ring0->size;
1175 /* Rx vmxnet rings structure initialization */
1176 ring0->next2fill = 0;
1177 ring1->next2fill = 0;
1178 ring0->next2comp = 0;
1179 ring1->next2comp = 0;
1180 ring0->gen = VMXNET3_INIT_GEN;
1181 ring1->gen = VMXNET3_INIT_GEN;
1182 comp_ring->next2proc = 0;
1183 comp_ring->gen = VMXNET3_INIT_GEN;
1185 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1186 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1187 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1188 size += rxq->data_desc_size * data_ring->size;
1190 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1191 VMXNET3_RING_BA_ALIGN, socket_id);
1193 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1197 memset(mz->addr, 0, mz->len);
1199 /* cmd_ring0 initialization */
1200 ring0->base = mz->addr;
1201 ring0->basePA = mz->iova;
1203 /* cmd_ring1 initialization */
1204 ring1->base = ring0->base + ring0->size;
1205 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1207 /* comp_ring initialization */
1208 comp_ring->base = ring1->base + ring1->size;
1209 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1212 /* data_ring initialization */
1213 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1215 (uint8_t *)(comp_ring->base + comp_ring->size);
1216 data_ring->basePA = comp_ring->basePA +
1217 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1220 /* cmd_ring0-cmd_ring1 buf_info allocation */
1221 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1223 ring = &rxq->cmd_ring[i];
1225 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1227 ring->buf_info = rte_zmalloc(mem_name,
1228 ring->size * sizeof(vmxnet3_buf_info_t),
1229 RTE_CACHE_LINE_SIZE);
1230 if (ring->buf_info == NULL) {
1231 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1236 /* Update the data portion with rxq */
1237 dev->data->rx_queues[queue_idx] = rxq;
1243 * Initializes Receive Unit
1244 * Load mbufs in rx queue in advance
1247 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1249 struct vmxnet3_hw *hw = dev->data->dev_private;
1254 PMD_INIT_FUNC_TRACE();
1256 for (i = 0; i < hw->num_rx_queues; i++) {
1257 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1259 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1260 /* Passing 0 as alloc_num will allocate full ring */
1261 ret = vmxnet3_post_rx_bufs(rxq, j);
1264 "ERROR: Posting Rxq: %d buffers ring: %d",
1269 * Updating device with the index:next2fill to fill the
1270 * mbufs for coming packets.
1272 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1273 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1274 rxq->cmd_ring[j].next2fill);
1277 rxq->stopped = FALSE;
1278 rxq->start_seg = NULL;
1281 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1282 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1284 txq->stopped = FALSE;
1290 static uint8_t rss_intel_key[40] = {
1291 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1292 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1293 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1294 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1295 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1299 * Configure RSS feature
1302 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1304 struct vmxnet3_hw *hw = dev->data->dev_private;
1305 struct VMXNET3_RSSConf *dev_rss_conf;
1306 struct rte_eth_rss_conf *port_rss_conf;
1310 PMD_INIT_FUNC_TRACE();
1312 dev_rss_conf = hw->rss_conf;
1313 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1315 /* loading hashFunc */
1316 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1317 /* loading hashKeySize */
1318 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1319 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1320 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1322 if (port_rss_conf->rss_key == NULL) {
1323 /* Default hash key */
1324 port_rss_conf->rss_key = rss_intel_key;
1327 /* loading hashKey */
1328 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1329 dev_rss_conf->hashKeySize);
1331 /* loading indTable */
1332 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1333 if (j == dev->data->nb_rx_queues)
1335 dev_rss_conf->indTable[i] = j;
1338 /* loading hashType */
1339 dev_rss_conf->hashType = 0;
1340 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1341 if (rss_hf & ETH_RSS_IPV4)
1342 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1343 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1344 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1345 if (rss_hf & ETH_RSS_IPV6)
1346 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1347 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1348 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1350 return VMXNET3_SUCCESS;