1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
57 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
78 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82 (unsigned long)rxq->cmd_ring[0].basePA,
83 (unsigned long)rxq->cmd_ring[1].basePA,
84 (unsigned long)rxq->comp_ring.basePA);
86 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89 (uint32_t)rxq->cmd_ring[0].size, avail,
90 rxq->comp_ring.next2proc,
91 rxq->cmd_ring[0].size - avail);
93 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96 rxq->cmd_ring[1].size - avail);
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
108 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111 (unsigned long)txq->cmd_ring.basePA,
112 (unsigned long)txq->comp_ring.basePA,
113 (unsigned long)txq->data_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117 (uint32_t)txq->cmd_ring.size, avail,
118 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 while (ring->next2comp != ring->next2fill) {
126 /* No need to worry about desc ownership, device is quiesced by now. */
127 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 rte_pktmbuf_free(buf_info->m);
135 vmxnet3_cmd_ring_adv_next2comp(ring);
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
144 for (i = 0; i < ring->size; i++) {
145 /* No need to worry about desc ownership, device is quiesced by now. */
146 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 rte_pktmbuf_free_seg(buf_info->m);
154 vmxnet3_cmd_ring_adv_next2comp(ring);
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 rte_free(ring->buf_info);
162 ring->buf_info = NULL;
166 vmxnet3_dev_tx_queue_release(void *txq)
168 vmxnet3_tx_queue_t *tq = txq;
172 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173 /* Release the cmd_ring */
174 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175 /* Release the memzone */
176 rte_memzone_free(tq->mz);
177 /* Release the queue */
183 vmxnet3_dev_rx_queue_release(void *rxq)
186 vmxnet3_rx_queue_t *rq = rxq;
190 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193 /* Release both the cmd_rings */
194 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197 /* Release the memzone */
198 rte_memzone_free(rq->mz);
200 /* Release the queue */
206 vmxnet3_dev_tx_queue_reset(void *txq)
208 vmxnet3_tx_queue_t *tq = txq;
209 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
215 /* Release the cmd_ring mbufs */
216 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 /* Tx vmxnet rings structure initialization*/
222 ring->gen = VMXNET3_INIT_GEN;
223 comp_ring->next2proc = 0;
224 comp_ring->gen = VMXNET3_INIT_GEN;
226 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228 size += tq->txdata_desc_size * data_ring->size;
230 memset(ring->base, 0, size);
234 vmxnet3_dev_rx_queue_reset(void *rxq)
237 vmxnet3_rx_queue_t *rq = rxq;
238 struct vmxnet3_hw *hw = rq->hw;
239 struct vmxnet3_cmd_ring *ring0, *ring1;
240 struct vmxnet3_comp_ring *comp_ring;
241 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 /* Release both the cmd_rings mbufs */
245 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248 ring0 = &rq->cmd_ring[0];
249 ring1 = &rq->cmd_ring[1];
250 comp_ring = &rq->comp_ring;
252 /* Rx vmxnet rings structure initialization */
253 ring0->next2fill = 0;
254 ring1->next2fill = 0;
255 ring0->next2comp = 0;
256 ring1->next2comp = 0;
257 ring0->gen = VMXNET3_INIT_GEN;
258 ring1->gen = VMXNET3_INIT_GEN;
259 comp_ring->next2proc = 0;
260 comp_ring->gen = VMXNET3_INIT_GEN;
262 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265 size += rq->data_desc_size * data_ring->size;
267 memset(ring0->base, 0, size);
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
275 PMD_INIT_FUNC_TRACE();
277 for (i = 0; i < dev->data->nb_tx_queues; i++) {
278 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
282 vmxnet3_dev_tx_queue_reset(txq);
286 for (i = 0; i < dev->data->nb_rx_queues; i++) {
287 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
291 vmxnet3_dev_rx_queue_reset(rxq);
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 struct rte_mbuf *mbuf;
302 /* Release cmd_ring descriptor and free mbuf */
303 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 rte_panic("EOP desc does not point to a valid mbuf");
308 rte_pktmbuf_free(mbuf);
310 txq->cmd_ring.buf_info[eop_idx].m = NULL;
312 while (txq->cmd_ring.next2comp != eop_idx) {
313 /* no out-of-order completion */
314 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
319 /* Mark the txd for which tcd was generated as completed */
320 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 return completed + 1;
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331 (comp_ring->base + comp_ring->next2proc);
333 while (tcd->gen == comp_ring->gen) {
334 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338 comp_ring->next2proc);
341 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
353 for (i = 0; i != nb_pkts; i++) {
355 ol_flags = m->ol_flags;
357 /* Non-TSO packet cannot occupy more than
358 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
360 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 /* check that only supported TX offloads are requested. */
367 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368 (ol_flags & PKT_TX_L4_MASK) ==
370 rte_errno = -ENOTSUP;
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375 ret = rte_validate_tx_offload(m);
381 ret = rte_net_intel_cksum_prepare(m);
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
396 vmxnet3_tx_queue_t *txq = tx_queue;
397 struct vmxnet3_hw *hw = txq->hw;
398 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
401 if (unlikely(txq->stopped)) {
402 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
406 /* Free up the comp_descriptors aggressively */
407 vmxnet3_tq_tx_complete(txq);
410 while (nb_tx < nb_pkts) {
411 Vmxnet3_GenericDesc *gdesc;
412 vmxnet3_buf_info_t *tbi;
413 uint32_t first2fill, avail, dw2;
414 struct rte_mbuf *txm = tx_pkts[nb_tx];
415 struct rte_mbuf *m_seg = txm;
417 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418 /* # of descriptors needed for a packet. */
419 unsigned count = txm->nb_segs;
421 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
423 /* Is command ring full? */
424 if (unlikely(avail == 0)) {
425 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426 txq->stats.tx_ring_full++;
427 txq->stats.drop_total += (nb_pkts - nb_tx);
431 /* Command ring is not full but cannot handle the
432 * multi-segmented packet. Let's try the next packet
435 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436 "(avail %d needed %d)", avail, count);
437 txq->stats.drop_total++;
439 txq->stats.drop_tso++;
440 rte_pktmbuf_free(txm);
445 /* Drop non-TSO packet that is excessively fragmented */
446 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449 txq->stats.drop_too_many_segs++;
450 txq->stats.drop_total++;
451 rte_pktmbuf_free(txm);
456 if (txm->nb_segs == 1 &&
457 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458 struct Vmxnet3_TxDataDesc *tdd;
460 /* Skip empty packets */
461 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
462 txq->stats.drop_total++;
463 rte_pktmbuf_free(txm);
468 tdd = (struct Vmxnet3_TxDataDesc *)
469 ((uint8 *)txq->data_ring.base +
470 txq->cmd_ring.next2fill *
471 txq->txdata_desc_size);
472 copy_size = rte_pktmbuf_pkt_len(txm);
473 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 /* use the previous gen bit for the SOP desc */
477 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
478 first2fill = txq->cmd_ring.next2fill;
480 /* Remember the transmit buffer for cleanup */
481 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
483 /* NB: the following assumes that VMXNET3 maximum
484 * transmit buffer size (16K) is greater than
485 * maximum size of mbuf segment size.
487 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
489 /* Skip empty segments */
490 if (unlikely(m_seg->data_len == 0))
495 (uint64)txq->cmd_ring.next2fill *
496 txq->txdata_desc_size;
498 rte_cpu_to_le_64(txq->data_ring.basePA +
501 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 gdesc->dword[2] = dw2 | m_seg->data_len;
507 /* move to the next2fill descriptor */
508 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
510 /* use the right gen for non-SOP desc */
511 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
512 } while ((m_seg = m_seg->next) != NULL);
514 /* set the last buf_info for the pkt */
516 /* Update the EOP descriptor */
517 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
519 /* Add VLAN tag if present */
520 gdesc = txq->cmd_ring.base + first2fill;
521 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
523 gdesc->txd.tci = txm->vlan_tci;
527 uint16_t mss = txm->tso_segsz;
531 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
532 gdesc->txd.om = VMXNET3_OM_TSO;
533 gdesc->txd.msscof = mss;
535 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
536 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
537 gdesc->txd.om = VMXNET3_OM_CSUM;
538 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
540 switch (txm->ol_flags & PKT_TX_L4_MASK) {
541 case PKT_TX_TCP_CKSUM:
542 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
544 case PKT_TX_UDP_CKSUM:
545 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
548 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
549 txm->ol_flags & PKT_TX_L4_MASK);
555 gdesc->txd.om = VMXNET3_OM_NONE;
556 gdesc->txd.msscof = 0;
560 /* flip the GEN bit on the SOP */
561 rte_compiler_barrier();
562 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
564 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
568 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
570 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
571 txq_ctrl->txNumDeferred = 0;
572 /* Notify vSwitch that packets are available. */
573 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
574 txq->cmd_ring.next2fill);
581 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
582 struct rte_mbuf *mbuf)
585 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
586 struct Vmxnet3_RxDesc *rxd =
587 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
588 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
591 /* Usually: One HEAD type buf per packet
592 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
593 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
596 /* We use single packet buffer so all heads here */
597 val = VMXNET3_RXD_BTYPE_HEAD;
599 /* All BODY type buffers for 2nd ring */
600 val = VMXNET3_RXD_BTYPE_BODY;
604 * Load mbuf pointer into buf_info[ring_size]
605 * buf_info structure is equivalent to cookie for virtio-virtqueue
608 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
611 /* Load Rx Descriptor with the buffer's GPA */
612 rxd->addr = buf_info->bufPA;
614 /* After this point rxd->addr MUST not be NULL */
616 rxd->len = buf_info->len;
617 /* Flip gen bit at the end to change ownership */
618 rxd->gen = ring->gen;
620 vmxnet3_cmd_ring_adv_next2fill(ring);
623 * Allocates mbufs and clusters. Post rx descriptors with buffer details
624 * so that device can receive packets in those buffers.
626 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
627 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
628 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
629 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
633 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
637 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
639 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640 struct rte_mbuf *mbuf;
642 /* Allocate blank mbuf for the current Rx Descriptor */
643 mbuf = rte_mbuf_raw_alloc(rxq->mp);
644 if (unlikely(mbuf == NULL)) {
645 PMD_RX_LOG(ERR, "Error allocating mbuf");
646 rxq->stats.rx_buf_alloc_failure++;
651 vmxnet3_renew_desc(rxq, ring_id, mbuf);
655 /* Return error only if no buffers are posted at present */
656 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662 /* MSS not provided by vmxnet3, guess one with available information */
664 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
665 struct rte_mbuf *rxm)
668 struct ipv4_hdr *ipv4_hdr;
669 struct ipv6_hdr *ipv6_hdr;
670 struct tcp_hdr *tcp_hdr;
673 RTE_ASSERT(rcd->tcp);
675 ptr = rte_pktmbuf_mtod(rxm, char *);
676 slen = rte_pktmbuf_data_len(rxm);
677 hlen = sizeof(struct ether_hdr);
680 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
681 return hw->mtu - sizeof(struct ipv4_hdr)
682 - sizeof(struct tcp_hdr);
684 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
685 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
687 } else if (rcd->v6) {
688 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
689 return hw->mtu - sizeof(struct ipv6_hdr) -
690 sizeof(struct tcp_hdr);
692 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
693 hlen += sizeof(struct ipv6_hdr);
694 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
697 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
702 if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
703 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
704 sizeof(struct ether_hdr);
706 tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
707 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
709 if (rxm->udata64 > 1)
710 return (rte_pktmbuf_pkt_len(rxm) - hlen +
711 rxm->udata64 - 1) / rxm->udata64;
713 return hw->mtu - hlen + sizeof(struct ether_hdr);
716 /* Receive side checksum and other offloads */
718 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
719 struct rte_mbuf *rxm, const uint8_t sop)
721 uint64_t ol_flags = rxm->ol_flags;
722 uint32_t packet_type = rxm->packet_type;
724 /* Offloads set in sop */
726 /* Set packet type */
727 packet_type |= RTE_PTYPE_L2_ETHER;
729 /* Check large packet receive */
730 if (VMXNET3_VERSION_GE_2(hw) &&
731 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
732 const Vmxnet3_RxCompDescExt *rcde =
733 (const Vmxnet3_RxCompDescExt *)rcd;
735 rxm->tso_segsz = rcde->mss;
736 rxm->udata64 = rcde->segCnt;
737 ol_flags |= PKT_RX_LRO;
739 } else { /* Offloads set in eop */
741 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
742 ol_flags |= PKT_RX_RSS_HASH;
743 rxm->hash.rss = rcd->rssHash;
746 /* Check for hardware stripped VLAN tag */
748 ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
749 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
752 /* Check packet type, checksum errors, etc. */
754 ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
757 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
760 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
762 ol_flags |= PKT_RX_IP_CKSUM_BAD;
765 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
767 packet_type |= RTE_PTYPE_L4_TCP;
769 packet_type |= RTE_PTYPE_L4_UDP;
772 packet_type |= RTE_PTYPE_L4_TCP;
773 ol_flags |= PKT_RX_L4_CKSUM_BAD;
774 } else if (rcd->udp) {
775 packet_type |= RTE_PTYPE_L4_UDP;
776 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 } else if (rcd->v6) {
780 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
783 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
785 packet_type |= RTE_PTYPE_L4_TCP;
787 packet_type |= RTE_PTYPE_L4_UDP;
790 packet_type |= RTE_PTYPE_L4_TCP;
791 ol_flags |= PKT_RX_L4_CKSUM_BAD;
792 } else if (rcd->udp) {
793 packet_type |= RTE_PTYPE_L4_UDP;
794 ol_flags |= PKT_RX_L4_CKSUM_BAD;
798 packet_type |= RTE_PTYPE_UNKNOWN;
801 /* Old variants of vmxnet3 do not provide MSS */
802 if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
803 rxm->tso_segsz = vmxnet3_guess_mss(hw,
808 rxm->ol_flags = ol_flags;
809 rxm->packet_type = packet_type;
813 * Process the Rx Completion Ring of given vmxnet3_rx_queue
814 * for nb_pkts burst and return the number of packets received
817 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
820 uint32_t nb_rxd, idx;
822 vmxnet3_rx_queue_t *rxq;
823 Vmxnet3_RxCompDesc *rcd;
824 vmxnet3_buf_info_t *rbi;
826 struct rte_mbuf *rxm = NULL;
827 struct vmxnet3_hw *hw;
837 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
839 if (unlikely(rxq->stopped)) {
840 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
844 while (rcd->gen == rxq->comp_ring.gen) {
845 struct rte_mbuf *newm;
847 if (nb_rx >= nb_pkts)
850 newm = rte_mbuf_raw_alloc(rxq->mp);
851 if (unlikely(newm == NULL)) {
852 PMD_RX_LOG(ERR, "Error allocating mbuf");
853 rxq->stats.rx_buf_alloc_failure++;
858 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
859 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
860 RTE_SET_USED(rxd); /* used only for assert when enabled */
861 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
863 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
865 RTE_ASSERT(rcd->len <= rxd->len);
868 /* Get the packet buffer pointer from buf_info */
871 /* Clear descriptor associated buf_info to be reused */
875 /* Update the index that we received a packet */
876 rxq->cmd_ring[ring_idx].next2comp = idx;
878 /* For RCD with EOP set, check if there is frame error */
879 if (unlikely(rcd->eop && rcd->err)) {
880 rxq->stats.drop_total++;
881 rxq->stats.drop_err++;
884 rxq->stats.drop_fcs++;
885 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
887 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
888 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
889 rxq->comp_ring.base), rcd->rxdIdx);
890 rte_pktmbuf_free_seg(rxm);
891 if (rxq->start_seg) {
892 struct rte_mbuf *start = rxq->start_seg;
894 rxq->start_seg = NULL;
895 rte_pktmbuf_free(start);
900 /* Initialize newly received packet buffer */
901 rxm->port = rxq->port_id;
904 rxm->pkt_len = (uint16_t)rcd->len;
905 rxm->data_len = (uint16_t)rcd->len;
906 rxm->data_off = RTE_PKTMBUF_HEADROOM;
909 rxm->packet_type = 0;
912 * If this is the first buffer of the received packet,
913 * set the pointer to the first mbuf of the packet
914 * Otherwise, update the total length and the number of segments
915 * of the current scattered packet, and update the pointer to
916 * the last mbuf of the current packet.
919 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
921 if (unlikely(rcd->len == 0)) {
922 RTE_ASSERT(rcd->eop);
925 "Rx buf was skipped. rxring[%d][%d])",
927 rte_pktmbuf_free_seg(rxm);
931 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
932 uint8_t *rdd = rxq->data_ring.base +
933 idx * rxq->data_desc_size;
935 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
936 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
940 rxq->start_seg = rxm;
942 vmxnet3_rx_offload(hw, rcd, rxm, 1);
944 struct rte_mbuf *start = rxq->start_seg;
946 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
949 start->pkt_len += rxm->data_len;
952 rxq->last_seg->next = rxm;
955 rte_pktmbuf_free_seg(rxm);
960 struct rte_mbuf *start = rxq->start_seg;
962 vmxnet3_rx_offload(hw, rcd, start, 0);
963 rx_pkts[nb_rx++] = start;
964 rxq->start_seg = NULL;
968 rxq->cmd_ring[ring_idx].next2comp = idx;
969 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
970 rxq->cmd_ring[ring_idx].size);
972 /* It's time to renew descriptors */
973 vmxnet3_renew_desc(rxq, ring_idx, newm);
974 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
975 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
976 rxq->cmd_ring[ring_idx].next2fill);
979 /* Advance to the next descriptor in comp_ring */
980 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
982 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
984 if (nb_rxd > rxq->cmd_ring[0].size) {
985 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
986 " relinquish control.");
991 if (unlikely(nb_rxd == 0)) {
993 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
994 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
995 if (unlikely(avail > 0)) {
996 /* try to alloc new buf and renew descriptors */
997 vmxnet3_post_rx_bufs(rxq, ring_idx);
1000 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1001 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1003 rxq->cmd_ring[ring_idx].next2fill);
1012 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1015 unsigned int socket_id,
1016 const struct rte_eth_txconf *tx_conf __rte_unused)
1018 struct vmxnet3_hw *hw = dev->data->dev_private;
1019 const struct rte_memzone *mz;
1020 struct vmxnet3_tx_queue *txq;
1021 struct vmxnet3_cmd_ring *ring;
1022 struct vmxnet3_comp_ring *comp_ring;
1023 struct vmxnet3_data_ring *data_ring;
1026 PMD_INIT_FUNC_TRACE();
1028 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1029 RTE_CACHE_LINE_SIZE);
1031 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1035 txq->queue_id = queue_idx;
1036 txq->port_id = dev->data->port_id;
1037 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1039 txq->qid = queue_idx;
1040 txq->stopped = TRUE;
1041 txq->txdata_desc_size = hw->txdata_desc_size;
1043 ring = &txq->cmd_ring;
1044 comp_ring = &txq->comp_ring;
1045 data_ring = &txq->data_ring;
1047 /* Tx vmxnet ring length should be between 512-4096 */
1048 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1049 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1050 VMXNET3_DEF_TX_RING_SIZE);
1052 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1053 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1054 VMXNET3_TX_RING_MAX_SIZE);
1057 ring->size = nb_desc;
1058 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1060 comp_ring->size = data_ring->size = ring->size;
1062 /* Tx vmxnet rings structure initialization*/
1063 ring->next2fill = 0;
1064 ring->next2comp = 0;
1065 ring->gen = VMXNET3_INIT_GEN;
1066 comp_ring->next2proc = 0;
1067 comp_ring->gen = VMXNET3_INIT_GEN;
1069 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1070 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1071 size += txq->txdata_desc_size * data_ring->size;
1073 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1074 VMXNET3_RING_BA_ALIGN, socket_id);
1076 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1080 memset(mz->addr, 0, mz->len);
1082 /* cmd_ring initialization */
1083 ring->base = mz->addr;
1084 ring->basePA = mz->iova;
1086 /* comp_ring initialization */
1087 comp_ring->base = ring->base + ring->size;
1088 comp_ring->basePA = ring->basePA +
1089 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1091 /* data_ring initialization */
1092 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1093 data_ring->basePA = comp_ring->basePA +
1094 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1096 /* cmd_ring0 buf_info allocation */
1097 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1098 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1099 if (ring->buf_info == NULL) {
1100 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1104 /* Update the data portion with txq */
1105 dev->data->tx_queues[queue_idx] = txq;
1111 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1114 unsigned int socket_id,
1115 __rte_unused const struct rte_eth_rxconf *rx_conf,
1116 struct rte_mempool *mp)
1118 const struct rte_memzone *mz;
1119 struct vmxnet3_rx_queue *rxq;
1120 struct vmxnet3_hw *hw = dev->data->dev_private;
1121 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1122 struct vmxnet3_comp_ring *comp_ring;
1123 struct vmxnet3_rx_data_ring *data_ring;
1128 PMD_INIT_FUNC_TRACE();
1130 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1131 RTE_CACHE_LINE_SIZE);
1133 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1138 rxq->queue_id = queue_idx;
1139 rxq->port_id = dev->data->port_id;
1140 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1142 rxq->qid1 = queue_idx;
1143 rxq->qid2 = queue_idx + hw->num_rx_queues;
1144 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1145 rxq->data_desc_size = hw->rxdata_desc_size;
1146 rxq->stopped = TRUE;
1148 ring0 = &rxq->cmd_ring[0];
1149 ring1 = &rxq->cmd_ring[1];
1150 comp_ring = &rxq->comp_ring;
1151 data_ring = &rxq->data_ring;
1153 /* Rx vmxnet rings length should be between 256-4096 */
1154 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1155 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1157 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1158 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1161 ring0->size = nb_desc;
1162 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1163 ring1->size = ring0->size;
1166 comp_ring->size = ring0->size + ring1->size;
1167 data_ring->size = ring0->size;
1169 /* Rx vmxnet rings structure initialization */
1170 ring0->next2fill = 0;
1171 ring1->next2fill = 0;
1172 ring0->next2comp = 0;
1173 ring1->next2comp = 0;
1174 ring0->gen = VMXNET3_INIT_GEN;
1175 ring1->gen = VMXNET3_INIT_GEN;
1176 comp_ring->next2proc = 0;
1177 comp_ring->gen = VMXNET3_INIT_GEN;
1179 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1180 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1181 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1182 size += rxq->data_desc_size * data_ring->size;
1184 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1185 VMXNET3_RING_BA_ALIGN, socket_id);
1187 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1191 memset(mz->addr, 0, mz->len);
1193 /* cmd_ring0 initialization */
1194 ring0->base = mz->addr;
1195 ring0->basePA = mz->iova;
1197 /* cmd_ring1 initialization */
1198 ring1->base = ring0->base + ring0->size;
1199 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1201 /* comp_ring initialization */
1202 comp_ring->base = ring1->base + ring1->size;
1203 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1206 /* data_ring initialization */
1207 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1209 (uint8_t *)(comp_ring->base + comp_ring->size);
1210 data_ring->basePA = comp_ring->basePA +
1211 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1214 /* cmd_ring0-cmd_ring1 buf_info allocation */
1215 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1217 ring = &rxq->cmd_ring[i];
1219 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1221 ring->buf_info = rte_zmalloc(mem_name,
1222 ring->size * sizeof(vmxnet3_buf_info_t),
1223 RTE_CACHE_LINE_SIZE);
1224 if (ring->buf_info == NULL) {
1225 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1230 /* Update the data portion with rxq */
1231 dev->data->rx_queues[queue_idx] = rxq;
1237 * Initializes Receive Unit
1238 * Load mbufs in rx queue in advance
1241 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1243 struct vmxnet3_hw *hw = dev->data->dev_private;
1248 PMD_INIT_FUNC_TRACE();
1250 for (i = 0; i < hw->num_rx_queues; i++) {
1251 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1253 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1254 /* Passing 0 as alloc_num will allocate full ring */
1255 ret = vmxnet3_post_rx_bufs(rxq, j);
1258 "ERROR: Posting Rxq: %d buffers ring: %d",
1263 * Updating device with the index:next2fill to fill the
1264 * mbufs for coming packets.
1266 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1267 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1268 rxq->cmd_ring[j].next2fill);
1271 rxq->stopped = FALSE;
1272 rxq->start_seg = NULL;
1275 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1276 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1278 txq->stopped = FALSE;
1284 static uint8_t rss_intel_key[40] = {
1285 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1286 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1287 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1288 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1289 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1293 * Configure RSS feature
1296 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1298 struct vmxnet3_hw *hw = dev->data->dev_private;
1299 struct VMXNET3_RSSConf *dev_rss_conf;
1300 struct rte_eth_rss_conf *port_rss_conf;
1304 PMD_INIT_FUNC_TRACE();
1306 dev_rss_conf = hw->rss_conf;
1307 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1309 /* loading hashFunc */
1310 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1311 /* loading hashKeySize */
1312 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1313 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1314 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1316 if (port_rss_conf->rss_key == NULL) {
1317 /* Default hash key */
1318 port_rss_conf->rss_key = rss_intel_key;
1321 /* loading hashKey */
1322 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1323 dev_rss_conf->hashKeySize);
1325 /* loading indTable */
1326 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1327 if (j == dev->data->nb_rx_queues)
1329 dev_rss_conf->indTable[i] = j;
1332 /* loading hashType */
1333 dev_rss_conf->hashType = 0;
1334 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1335 if (rss_hf & ETH_RSS_IPV4)
1336 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1337 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1338 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1339 if (rss_hf & ETH_RSS_IPV6)
1340 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1341 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1342 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1344 return VMXNET3_SUCCESS;