1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
57 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
78 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82 (unsigned long)rxq->cmd_ring[0].basePA,
83 (unsigned long)rxq->cmd_ring[1].basePA,
84 (unsigned long)rxq->comp_ring.basePA);
86 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89 (uint32_t)rxq->cmd_ring[0].size, avail,
90 rxq->comp_ring.next2proc,
91 rxq->cmd_ring[0].size - avail);
93 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96 rxq->cmd_ring[1].size - avail);
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
108 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111 (unsigned long)txq->cmd_ring.basePA,
112 (unsigned long)txq->comp_ring.basePA,
113 (unsigned long)txq->data_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117 (uint32_t)txq->cmd_ring.size, avail,
118 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 while (ring->next2comp != ring->next2fill) {
126 /* No need to worry about desc ownership, device is quiesced by now. */
127 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130 rte_pktmbuf_free(buf_info->m);
135 vmxnet3_cmd_ring_adv_next2comp(ring);
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
144 for (i = 0; i < ring->size; i++) {
145 /* No need to worry about desc ownership, device is quiesced by now. */
146 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149 rte_pktmbuf_free_seg(buf_info->m);
154 vmxnet3_cmd_ring_adv_next2comp(ring);
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 rte_free(ring->buf_info);
162 ring->buf_info = NULL;
166 vmxnet3_dev_tx_queue_release(void *txq)
168 vmxnet3_tx_queue_t *tq = txq;
172 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173 /* Release the cmd_ring */
174 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175 /* Release the memzone */
176 rte_memzone_free(tq->mz);
177 /* Release the queue */
183 vmxnet3_dev_rx_queue_release(void *rxq)
186 vmxnet3_rx_queue_t *rq = rxq;
190 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193 /* Release both the cmd_rings */
194 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197 /* Release the memzone */
198 rte_memzone_free(rq->mz);
200 /* Release the queue */
206 vmxnet3_dev_tx_queue_reset(void *txq)
208 vmxnet3_tx_queue_t *tq = txq;
209 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
215 /* Release the cmd_ring mbufs */
216 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 /* Tx vmxnet rings structure initialization*/
222 ring->gen = VMXNET3_INIT_GEN;
223 comp_ring->next2proc = 0;
224 comp_ring->gen = VMXNET3_INIT_GEN;
226 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228 size += tq->txdata_desc_size * data_ring->size;
230 memset(ring->base, 0, size);
234 vmxnet3_dev_rx_queue_reset(void *rxq)
237 vmxnet3_rx_queue_t *rq = rxq;
238 struct vmxnet3_hw *hw = rq->hw;
239 struct vmxnet3_cmd_ring *ring0, *ring1;
240 struct vmxnet3_comp_ring *comp_ring;
241 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 /* Release both the cmd_rings mbufs */
245 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248 ring0 = &rq->cmd_ring[0];
249 ring1 = &rq->cmd_ring[1];
250 comp_ring = &rq->comp_ring;
252 /* Rx vmxnet rings structure initialization */
253 ring0->next2fill = 0;
254 ring1->next2fill = 0;
255 ring0->next2comp = 0;
256 ring1->next2comp = 0;
257 ring0->gen = VMXNET3_INIT_GEN;
258 ring1->gen = VMXNET3_INIT_GEN;
259 comp_ring->next2proc = 0;
260 comp_ring->gen = VMXNET3_INIT_GEN;
262 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265 size += rq->data_desc_size * data_ring->size;
267 memset(ring0->base, 0, size);
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
275 PMD_INIT_FUNC_TRACE();
277 for (i = 0; i < dev->data->nb_tx_queues; i++) {
278 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
282 vmxnet3_dev_tx_queue_reset(txq);
286 for (i = 0; i < dev->data->nb_rx_queues; i++) {
287 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
291 vmxnet3_dev_rx_queue_reset(rxq);
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 struct rte_mbuf *mbuf;
302 /* Release cmd_ring descriptor and free mbuf */
303 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 rte_panic("EOP desc does not point to a valid mbuf");
308 rte_pktmbuf_free(mbuf);
310 txq->cmd_ring.buf_info[eop_idx].m = NULL;
312 while (txq->cmd_ring.next2comp != eop_idx) {
313 /* no out-of-order completion */
314 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
319 /* Mark the txd for which tcd was generated as completed */
320 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 return completed + 1;
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331 (comp_ring->base + comp_ring->next2proc);
333 while (tcd->gen == comp_ring->gen) {
334 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338 comp_ring->next2proc);
341 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
353 for (i = 0; i != nb_pkts; i++) {
355 ol_flags = m->ol_flags;
357 /* Non-TSO packet cannot occupy more than
358 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
360 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 /* check that only supported TX offloads are requested. */
367 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368 (ol_flags & PKT_TX_L4_MASK) ==
370 rte_errno = -ENOTSUP;
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375 ret = rte_validate_tx_offload(m);
381 ret = rte_net_intel_cksum_prepare(m);
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
396 vmxnet3_tx_queue_t *txq = tx_queue;
397 struct vmxnet3_hw *hw = txq->hw;
398 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
401 if (unlikely(txq->stopped)) {
402 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
406 /* Free up the comp_descriptors aggressively */
407 vmxnet3_tq_tx_complete(txq);
410 while (nb_tx < nb_pkts) {
411 Vmxnet3_GenericDesc *gdesc;
412 vmxnet3_buf_info_t *tbi;
413 uint32_t first2fill, avail, dw2;
414 struct rte_mbuf *txm = tx_pkts[nb_tx];
415 struct rte_mbuf *m_seg = txm;
417 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418 /* # of descriptors needed for a packet. */
419 unsigned count = txm->nb_segs;
421 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
423 /* Is command ring full? */
424 if (unlikely(avail == 0)) {
425 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426 txq->stats.tx_ring_full++;
427 txq->stats.drop_total += (nb_pkts - nb_tx);
431 /* Command ring is not full but cannot handle the
432 * multi-segmented packet. Let's try the next packet
435 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436 "(avail %d needed %d)", avail, count);
437 txq->stats.drop_total++;
439 txq->stats.drop_tso++;
440 rte_pktmbuf_free(txm);
445 /* Drop non-TSO packet that is excessively fragmented */
446 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449 txq->stats.drop_too_many_segs++;
450 txq->stats.drop_total++;
451 rte_pktmbuf_free(txm);
456 if (txm->nb_segs == 1 &&
457 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458 struct Vmxnet3_TxDataDesc *tdd;
460 tdd = (struct Vmxnet3_TxDataDesc *)
461 ((uint8 *)txq->data_ring.base +
462 txq->cmd_ring.next2fill *
463 txq->txdata_desc_size);
464 copy_size = rte_pktmbuf_pkt_len(txm);
465 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
468 /* use the previous gen bit for the SOP desc */
469 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
470 first2fill = txq->cmd_ring.next2fill;
472 /* Remember the transmit buffer for cleanup */
473 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
475 /* NB: the following assumes that VMXNET3 maximum
476 * transmit buffer size (16K) is greater than
477 * maximum size of mbuf segment size.
479 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
482 (uint64)txq->cmd_ring.next2fill *
483 txq->txdata_desc_size;
485 rte_cpu_to_le_64(txq->data_ring.basePA +
488 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
491 gdesc->dword[2] = dw2 | m_seg->data_len;
494 /* move to the next2fill descriptor */
495 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
497 /* use the right gen for non-SOP desc */
498 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
499 } while ((m_seg = m_seg->next) != NULL);
501 /* set the last buf_info for the pkt */
503 /* Update the EOP descriptor */
504 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
506 /* Add VLAN tag if present */
507 gdesc = txq->cmd_ring.base + first2fill;
508 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
510 gdesc->txd.tci = txm->vlan_tci;
514 uint16_t mss = txm->tso_segsz;
518 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
519 gdesc->txd.om = VMXNET3_OM_TSO;
520 gdesc->txd.msscof = mss;
522 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
523 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
524 gdesc->txd.om = VMXNET3_OM_CSUM;
525 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
527 switch (txm->ol_flags & PKT_TX_L4_MASK) {
528 case PKT_TX_TCP_CKSUM:
529 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
531 case PKT_TX_UDP_CKSUM:
532 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
535 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
536 txm->ol_flags & PKT_TX_L4_MASK);
542 gdesc->txd.om = VMXNET3_OM_NONE;
543 gdesc->txd.msscof = 0;
547 /* flip the GEN bit on the SOP */
548 rte_compiler_barrier();
549 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
551 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
555 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
557 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
558 txq_ctrl->txNumDeferred = 0;
559 /* Notify vSwitch that packets are available. */
560 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
561 txq->cmd_ring.next2fill);
568 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
569 struct rte_mbuf *mbuf)
572 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
573 struct Vmxnet3_RxDesc *rxd =
574 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
575 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
578 /* Usually: One HEAD type buf per packet
579 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
580 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
583 /* We use single packet buffer so all heads here */
584 val = VMXNET3_RXD_BTYPE_HEAD;
586 /* All BODY type buffers for 2nd ring */
587 val = VMXNET3_RXD_BTYPE_BODY;
591 * Load mbuf pointer into buf_info[ring_size]
592 * buf_info structure is equivalent to cookie for virtio-virtqueue
595 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
596 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
598 /* Load Rx Descriptor with the buffer's GPA */
599 rxd->addr = buf_info->bufPA;
601 /* After this point rxd->addr MUST not be NULL */
603 rxd->len = buf_info->len;
604 /* Flip gen bit at the end to change ownership */
605 rxd->gen = ring->gen;
607 vmxnet3_cmd_ring_adv_next2fill(ring);
610 * Allocates mbufs and clusters. Post rx descriptors with buffer details
611 * so that device can receive packets in those buffers.
613 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
614 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
615 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
616 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
620 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
624 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
626 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
627 struct rte_mbuf *mbuf;
629 /* Allocate blank mbuf for the current Rx Descriptor */
630 mbuf = rte_mbuf_raw_alloc(rxq->mp);
631 if (unlikely(mbuf == NULL)) {
632 PMD_RX_LOG(ERR, "Error allocating mbuf");
633 rxq->stats.rx_buf_alloc_failure++;
638 vmxnet3_renew_desc(rxq, ring_id, mbuf);
642 /* Return error only if no buffers are posted at present */
643 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
649 /* MSS not provided by vmxnet3, guess one with available information */
651 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
652 struct rte_mbuf *rxm)
655 struct ipv4_hdr *ipv4_hdr;
656 struct ipv6_hdr *ipv6_hdr;
657 struct tcp_hdr *tcp_hdr;
660 RTE_ASSERT(rcd->tcp);
662 ptr = rte_pktmbuf_mtod(rxm, char *);
663 slen = rte_pktmbuf_data_len(rxm);
664 hlen = sizeof(struct ether_hdr);
667 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
668 return hw->mtu - sizeof(struct ipv4_hdr)
669 - sizeof(struct tcp_hdr);
671 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
672 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
674 } else if (rcd->v6) {
675 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
676 return hw->mtu - sizeof(struct ipv6_hdr) -
677 sizeof(struct tcp_hdr);
679 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
680 hlen += sizeof(struct ipv6_hdr);
681 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
684 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
689 if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
690 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
691 sizeof(struct ether_hdr);
693 tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
694 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
696 if (rxm->udata64 > 1)
697 return (rte_pktmbuf_pkt_len(rxm) - hlen +
698 rxm->udata64 - 1) / rxm->udata64;
700 return hw->mtu - hlen + sizeof(struct ether_hdr);
703 /* Receive side checksum and other offloads */
705 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
706 struct rte_mbuf *rxm, const uint8_t sop)
708 uint64_t ol_flags = rxm->ol_flags;
709 uint32_t packet_type = rxm->packet_type;
711 /* Offloads set in sop */
713 /* Set packet type */
714 packet_type |= RTE_PTYPE_L2_ETHER;
716 /* Check large packet receive */
717 if (VMXNET3_VERSION_GE_2(hw) &&
718 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
719 const Vmxnet3_RxCompDescExt *rcde =
720 (const Vmxnet3_RxCompDescExt *)rcd;
722 rxm->tso_segsz = rcde->mss;
723 rxm->udata64 = rcde->segCnt;
724 ol_flags |= PKT_RX_LRO;
726 } else { /* Offloads set in eop */
728 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
729 ol_flags |= PKT_RX_RSS_HASH;
730 rxm->hash.rss = rcd->rssHash;
733 /* Check for hardware stripped VLAN tag */
735 ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
736 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
739 /* Check packet type, checksum errors, etc. */
741 ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
744 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
747 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
749 ol_flags |= PKT_RX_IP_CKSUM_BAD;
752 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
754 packet_type |= RTE_PTYPE_L4_TCP;
756 packet_type |= RTE_PTYPE_L4_UDP;
759 packet_type |= RTE_PTYPE_L4_TCP;
760 ol_flags |= PKT_RX_L4_CKSUM_BAD;
761 } else if (rcd->udp) {
762 packet_type |= RTE_PTYPE_L4_UDP;
763 ol_flags |= PKT_RX_L4_CKSUM_BAD;
766 } else if (rcd->v6) {
767 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
770 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
772 packet_type |= RTE_PTYPE_L4_TCP;
774 packet_type |= RTE_PTYPE_L4_UDP;
777 packet_type |= RTE_PTYPE_L4_TCP;
778 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 } else if (rcd->udp) {
780 packet_type |= RTE_PTYPE_L4_UDP;
781 ol_flags |= PKT_RX_L4_CKSUM_BAD;
785 packet_type |= RTE_PTYPE_UNKNOWN;
788 /* Old variants of vmxnet3 do not provide MSS */
789 if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
790 rxm->tso_segsz = vmxnet3_guess_mss(hw,
795 rxm->ol_flags = ol_flags;
796 rxm->packet_type = packet_type;
800 * Process the Rx Completion Ring of given vmxnet3_rx_queue
801 * for nb_pkts burst and return the number of packets received
804 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
807 uint32_t nb_rxd, idx;
809 vmxnet3_rx_queue_t *rxq;
810 Vmxnet3_RxCompDesc *rcd;
811 vmxnet3_buf_info_t *rbi;
813 struct rte_mbuf *rxm = NULL;
814 struct vmxnet3_hw *hw;
824 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
826 if (unlikely(rxq->stopped)) {
827 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
831 while (rcd->gen == rxq->comp_ring.gen) {
832 struct rte_mbuf *newm;
834 if (nb_rx >= nb_pkts)
837 newm = rte_mbuf_raw_alloc(rxq->mp);
838 if (unlikely(newm == NULL)) {
839 PMD_RX_LOG(ERR, "Error allocating mbuf");
840 rxq->stats.rx_buf_alloc_failure++;
845 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
846 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
847 RTE_SET_USED(rxd); /* used only for assert when enabled */
848 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
850 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
852 RTE_ASSERT(rcd->len <= rxd->len);
855 /* Get the packet buffer pointer from buf_info */
858 /* Clear descriptor associated buf_info to be reused */
862 /* Update the index that we received a packet */
863 rxq->cmd_ring[ring_idx].next2comp = idx;
865 /* For RCD with EOP set, check if there is frame error */
866 if (unlikely(rcd->eop && rcd->err)) {
867 rxq->stats.drop_total++;
868 rxq->stats.drop_err++;
871 rxq->stats.drop_fcs++;
872 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
874 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
875 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
876 rxq->comp_ring.base), rcd->rxdIdx);
877 rte_pktmbuf_free_seg(rxm);
878 if (rxq->start_seg) {
879 struct rte_mbuf *start = rxq->start_seg;
881 rxq->start_seg = NULL;
882 rte_pktmbuf_free(start);
887 /* Initialize newly received packet buffer */
888 rxm->port = rxq->port_id;
891 rxm->pkt_len = (uint16_t)rcd->len;
892 rxm->data_len = (uint16_t)rcd->len;
893 rxm->data_off = RTE_PKTMBUF_HEADROOM;
896 rxm->packet_type = 0;
899 * If this is the first buffer of the received packet,
900 * set the pointer to the first mbuf of the packet
901 * Otherwise, update the total length and the number of segments
902 * of the current scattered packet, and update the pointer to
903 * the last mbuf of the current packet.
906 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
908 if (unlikely(rcd->len == 0)) {
909 RTE_ASSERT(rcd->eop);
912 "Rx buf was skipped. rxring[%d][%d])",
914 rte_pktmbuf_free_seg(rxm);
918 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
919 uint8_t *rdd = rxq->data_ring.base +
920 idx * rxq->data_desc_size;
922 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
923 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
927 rxq->start_seg = rxm;
929 vmxnet3_rx_offload(hw, rcd, rxm, 1);
931 struct rte_mbuf *start = rxq->start_seg;
933 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
936 start->pkt_len += rxm->data_len;
939 rxq->last_seg->next = rxm;
942 rte_pktmbuf_free_seg(rxm);
947 struct rte_mbuf *start = rxq->start_seg;
949 vmxnet3_rx_offload(hw, rcd, start, 0);
950 rx_pkts[nb_rx++] = start;
951 rxq->start_seg = NULL;
955 rxq->cmd_ring[ring_idx].next2comp = idx;
956 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
957 rxq->cmd_ring[ring_idx].size);
959 /* It's time to renew descriptors */
960 vmxnet3_renew_desc(rxq, ring_idx, newm);
961 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
962 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
963 rxq->cmd_ring[ring_idx].next2fill);
966 /* Advance to the next descriptor in comp_ring */
967 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
969 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
971 if (nb_rxd > rxq->cmd_ring[0].size) {
972 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
973 " relinquish control.");
978 if (unlikely(nb_rxd == 0)) {
980 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
981 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
982 if (unlikely(avail > 0)) {
983 /* try to alloc new buf and renew descriptors */
984 vmxnet3_post_rx_bufs(rxq, ring_idx);
987 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
988 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
989 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
990 rxq->cmd_ring[ring_idx].next2fill);
999 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1002 unsigned int socket_id,
1003 const struct rte_eth_txconf *tx_conf)
1005 struct vmxnet3_hw *hw = dev->data->dev_private;
1006 const struct rte_memzone *mz;
1007 struct vmxnet3_tx_queue *txq;
1008 struct vmxnet3_cmd_ring *ring;
1009 struct vmxnet3_comp_ring *comp_ring;
1010 struct vmxnet3_data_ring *data_ring;
1013 PMD_INIT_FUNC_TRACE();
1015 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
1016 ETH_TXQ_FLAGS_NOXSUMSCTP) {
1017 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
1021 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1022 RTE_CACHE_LINE_SIZE);
1024 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1028 txq->queue_id = queue_idx;
1029 txq->port_id = dev->data->port_id;
1030 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1032 txq->qid = queue_idx;
1033 txq->stopped = TRUE;
1034 txq->txdata_desc_size = hw->txdata_desc_size;
1036 ring = &txq->cmd_ring;
1037 comp_ring = &txq->comp_ring;
1038 data_ring = &txq->data_ring;
1040 /* Tx vmxnet ring length should be between 512-4096 */
1041 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1042 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1043 VMXNET3_DEF_TX_RING_SIZE);
1045 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1046 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1047 VMXNET3_TX_RING_MAX_SIZE);
1050 ring->size = nb_desc;
1051 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1053 comp_ring->size = data_ring->size = ring->size;
1055 /* Tx vmxnet rings structure initialization*/
1056 ring->next2fill = 0;
1057 ring->next2comp = 0;
1058 ring->gen = VMXNET3_INIT_GEN;
1059 comp_ring->next2proc = 0;
1060 comp_ring->gen = VMXNET3_INIT_GEN;
1062 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1063 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1064 size += txq->txdata_desc_size * data_ring->size;
1066 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1067 VMXNET3_RING_BA_ALIGN, socket_id);
1069 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1073 memset(mz->addr, 0, mz->len);
1075 /* cmd_ring initialization */
1076 ring->base = mz->addr;
1077 ring->basePA = mz->iova;
1079 /* comp_ring initialization */
1080 comp_ring->base = ring->base + ring->size;
1081 comp_ring->basePA = ring->basePA +
1082 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1084 /* data_ring initialization */
1085 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1086 data_ring->basePA = comp_ring->basePA +
1087 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1089 /* cmd_ring0 buf_info allocation */
1090 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1091 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1092 if (ring->buf_info == NULL) {
1093 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1097 /* Update the data portion with txq */
1098 dev->data->tx_queues[queue_idx] = txq;
1104 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1107 unsigned int socket_id,
1108 __rte_unused const struct rte_eth_rxconf *rx_conf,
1109 struct rte_mempool *mp)
1111 const struct rte_memzone *mz;
1112 struct vmxnet3_rx_queue *rxq;
1113 struct vmxnet3_hw *hw = dev->data->dev_private;
1114 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1115 struct vmxnet3_comp_ring *comp_ring;
1116 struct vmxnet3_rx_data_ring *data_ring;
1121 PMD_INIT_FUNC_TRACE();
1123 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1124 RTE_CACHE_LINE_SIZE);
1126 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1131 rxq->queue_id = queue_idx;
1132 rxq->port_id = dev->data->port_id;
1133 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1135 rxq->qid1 = queue_idx;
1136 rxq->qid2 = queue_idx + hw->num_rx_queues;
1137 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1138 rxq->data_desc_size = hw->rxdata_desc_size;
1139 rxq->stopped = TRUE;
1141 ring0 = &rxq->cmd_ring[0];
1142 ring1 = &rxq->cmd_ring[1];
1143 comp_ring = &rxq->comp_ring;
1144 data_ring = &rxq->data_ring;
1146 /* Rx vmxnet rings length should be between 256-4096 */
1147 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1148 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1150 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1151 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1154 ring0->size = nb_desc;
1155 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1156 ring1->size = ring0->size;
1159 comp_ring->size = ring0->size + ring1->size;
1160 data_ring->size = ring0->size;
1162 /* Rx vmxnet rings structure initialization */
1163 ring0->next2fill = 0;
1164 ring1->next2fill = 0;
1165 ring0->next2comp = 0;
1166 ring1->next2comp = 0;
1167 ring0->gen = VMXNET3_INIT_GEN;
1168 ring1->gen = VMXNET3_INIT_GEN;
1169 comp_ring->next2proc = 0;
1170 comp_ring->gen = VMXNET3_INIT_GEN;
1172 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1173 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1174 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1175 size += rxq->data_desc_size * data_ring->size;
1177 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1178 VMXNET3_RING_BA_ALIGN, socket_id);
1180 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1184 memset(mz->addr, 0, mz->len);
1186 /* cmd_ring0 initialization */
1187 ring0->base = mz->addr;
1188 ring0->basePA = mz->iova;
1190 /* cmd_ring1 initialization */
1191 ring1->base = ring0->base + ring0->size;
1192 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1194 /* comp_ring initialization */
1195 comp_ring->base = ring1->base + ring1->size;
1196 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1199 /* data_ring initialization */
1200 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1202 (uint8_t *)(comp_ring->base + comp_ring->size);
1203 data_ring->basePA = comp_ring->basePA +
1204 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1207 /* cmd_ring0-cmd_ring1 buf_info allocation */
1208 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1210 ring = &rxq->cmd_ring[i];
1212 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1214 ring->buf_info = rte_zmalloc(mem_name,
1215 ring->size * sizeof(vmxnet3_buf_info_t),
1216 RTE_CACHE_LINE_SIZE);
1217 if (ring->buf_info == NULL) {
1218 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1223 /* Update the data portion with rxq */
1224 dev->data->rx_queues[queue_idx] = rxq;
1230 * Initializes Receive Unit
1231 * Load mbufs in rx queue in advance
1234 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1236 struct vmxnet3_hw *hw = dev->data->dev_private;
1241 PMD_INIT_FUNC_TRACE();
1243 for (i = 0; i < hw->num_rx_queues; i++) {
1244 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1246 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1247 /* Passing 0 as alloc_num will allocate full ring */
1248 ret = vmxnet3_post_rx_bufs(rxq, j);
1251 "ERROR: Posting Rxq: %d buffers ring: %d",
1256 * Updating device with the index:next2fill to fill the
1257 * mbufs for coming packets.
1259 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1260 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1261 rxq->cmd_ring[j].next2fill);
1264 rxq->stopped = FALSE;
1265 rxq->start_seg = NULL;
1268 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1269 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1271 txq->stopped = FALSE;
1277 static uint8_t rss_intel_key[40] = {
1278 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1279 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1280 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1281 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1282 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1286 * Configure RSS feature
1289 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1291 struct vmxnet3_hw *hw = dev->data->dev_private;
1292 struct VMXNET3_RSSConf *dev_rss_conf;
1293 struct rte_eth_rss_conf *port_rss_conf;
1297 PMD_INIT_FUNC_TRACE();
1299 dev_rss_conf = hw->rss_conf;
1300 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1302 /* loading hashFunc */
1303 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1304 /* loading hashKeySize */
1305 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1306 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1307 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1309 if (port_rss_conf->rss_key == NULL) {
1310 /* Default hash key */
1311 port_rss_conf->rss_key = rss_intel_key;
1314 /* loading hashKey */
1315 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1316 dev_rss_conf->hashKeySize);
1318 /* loading indTable */
1319 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1320 if (j == dev->data->nb_rx_queues)
1322 dev_rss_conf->indTable[i] = j;
1325 /* loading hashType */
1326 dev_rss_conf->hashType = 0;
1327 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1328 if (rss_hf & ETH_RSS_IPV4)
1329 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1330 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1331 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1332 if (rss_hf & ETH_RSS_IPV6)
1333 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1334 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1335 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1337 return VMXNET3_SUCCESS;