4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
208 vmxnet3_dev_rx_queue_release(void *rxq)
211 vmxnet3_rx_queue_t *rq = rxq;
215 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
218 /* Release both the cmd_rings */
219 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
225 vmxnet3_dev_tx_queue_reset(void *txq)
227 vmxnet3_tx_queue_t *tq = txq;
228 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
234 /* Release the cmd_ring mbufs */
235 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
238 /* Tx vmxnet rings structure initialization*/
241 ring->gen = VMXNET3_INIT_GEN;
242 comp_ring->next2proc = 0;
243 comp_ring->gen = VMXNET3_INIT_GEN;
245 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247 size += tq->txdata_desc_size * data_ring->size;
249 memset(ring->base, 0, size);
253 vmxnet3_dev_rx_queue_reset(void *rxq)
256 vmxnet3_rx_queue_t *rq = rxq;
257 struct vmxnet3_cmd_ring *ring0, *ring1;
258 struct vmxnet3_comp_ring *comp_ring;
262 /* Release both the cmd_rings mbufs */
263 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
267 ring0 = &rq->cmd_ring[0];
268 ring1 = &rq->cmd_ring[1];
269 comp_ring = &rq->comp_ring;
271 /* Rx vmxnet rings structure initialization */
272 ring0->next2fill = 0;
273 ring1->next2fill = 0;
274 ring0->next2comp = 0;
275 ring1->next2comp = 0;
276 ring0->gen = VMXNET3_INIT_GEN;
277 ring1->gen = VMXNET3_INIT_GEN;
278 comp_ring->next2proc = 0;
279 comp_ring->gen = VMXNET3_INIT_GEN;
281 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
284 memset(ring0->base, 0, size);
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
292 PMD_INIT_FUNC_TRACE();
294 for (i = 0; i < dev->data->nb_tx_queues; i++) {
295 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
299 vmxnet3_dev_tx_queue_reset(txq);
303 for (i = 0; i < dev->data->nb_rx_queues; i++) {
304 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
308 vmxnet3_dev_rx_queue_reset(rxq);
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
317 struct rte_mbuf *mbuf;
319 /* Release cmd_ring descriptor and free mbuf */
320 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
322 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
324 rte_panic("EOP desc does not point to a valid mbuf");
325 rte_pktmbuf_free(mbuf);
327 txq->cmd_ring.buf_info[eop_idx].m = NULL;
329 while (txq->cmd_ring.next2comp != eop_idx) {
330 /* no out-of-order completion */
331 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
336 /* Mark the txd for which tcd was generated as completed */
337 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
339 return completed + 1;
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
346 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348 (comp_ring->base + comp_ring->next2proc);
350 while (tcd->gen == comp_ring->gen) {
351 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
353 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355 comp_ring->next2proc);
358 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
370 for (i = 0; i != nb_pkts; i++) {
372 ol_flags = m->ol_flags;
374 /* Non-TSO packet cannot occupy more than
375 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
377 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
383 /* check that only supported TX offloads are requested. */
384 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385 (ol_flags & PKT_TX_L4_MASK) ==
387 rte_errno = -ENOTSUP;
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392 ret = rte_validate_tx_offload(m);
398 ret = rte_net_intel_cksum_prepare(m);
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
413 vmxnet3_tx_queue_t *txq = tx_queue;
414 struct vmxnet3_hw *hw = txq->hw;
415 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
418 if (unlikely(txq->stopped)) {
419 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
423 /* Free up the comp_descriptors aggressively */
424 vmxnet3_tq_tx_complete(txq);
427 while (nb_tx < nb_pkts) {
428 Vmxnet3_GenericDesc *gdesc;
429 vmxnet3_buf_info_t *tbi;
430 uint32_t first2fill, avail, dw2;
431 struct rte_mbuf *txm = tx_pkts[nb_tx];
432 struct rte_mbuf *m_seg = txm;
434 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435 /* # of descriptors needed for a packet. */
436 unsigned count = txm->nb_segs;
438 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
440 /* Is command ring full? */
441 if (unlikely(avail == 0)) {
442 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443 txq->stats.tx_ring_full++;
444 txq->stats.drop_total += (nb_pkts - nb_tx);
448 /* Command ring is not full but cannot handle the
449 * multi-segmented packet. Let's try the next packet
452 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453 "(avail %d needed %d)", avail, count);
454 txq->stats.drop_total++;
456 txq->stats.drop_tso++;
457 rte_pktmbuf_free(txm);
462 /* Drop non-TSO packet that is excessively fragmented */
463 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466 txq->stats.drop_too_many_segs++;
467 txq->stats.drop_total++;
468 rte_pktmbuf_free(txm);
473 if (txm->nb_segs == 1 &&
474 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
475 struct Vmxnet3_TxDataDesc *tdd;
477 tdd = (struct Vmxnet3_TxDataDesc *)
478 ((uint8 *)txq->data_ring.base +
479 txq->cmd_ring.next2fill *
480 txq->txdata_desc_size);
481 copy_size = rte_pktmbuf_pkt_len(txm);
482 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
485 /* use the previous gen bit for the SOP desc */
486 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
487 first2fill = txq->cmd_ring.next2fill;
489 /* Remember the transmit buffer for cleanup */
490 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
492 /* NB: the following assumes that VMXNET3 maximum
493 * transmit buffer size (16K) is greater than
494 * maximum size of mbuf segment size.
496 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
498 uint64 offset = txq->cmd_ring.next2fill *
499 txq->txdata_desc_size;
501 rte_cpu_to_le_64(txq->data_ring.basePA +
504 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
507 gdesc->dword[2] = dw2 | m_seg->data_len;
510 /* move to the next2fill descriptor */
511 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513 /* use the right gen for non-SOP desc */
514 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
515 } while ((m_seg = m_seg->next) != NULL);
517 /* set the last buf_info for the pkt */
519 /* Update the EOP descriptor */
520 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522 /* Add VLAN tag if present */
523 gdesc = txq->cmd_ring.base + first2fill;
524 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
526 gdesc->txd.tci = txm->vlan_tci;
530 uint16_t mss = txm->tso_segsz;
534 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
535 gdesc->txd.om = VMXNET3_OM_TSO;
536 gdesc->txd.msscof = mss;
538 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
539 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
540 gdesc->txd.om = VMXNET3_OM_CSUM;
541 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543 switch (txm->ol_flags & PKT_TX_L4_MASK) {
544 case PKT_TX_TCP_CKSUM:
545 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
547 case PKT_TX_UDP_CKSUM:
548 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
551 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
552 txm->ol_flags & PKT_TX_L4_MASK);
558 gdesc->txd.om = VMXNET3_OM_NONE;
559 gdesc->txd.msscof = 0;
563 /* flip the GEN bit on the SOP */
564 rte_compiler_barrier();
565 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
567 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
571 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
573 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
574 txq_ctrl->txNumDeferred = 0;
575 /* Notify vSwitch that packets are available. */
576 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
577 txq->cmd_ring.next2fill);
584 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
585 struct rte_mbuf *mbuf)
588 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
589 struct Vmxnet3_RxDesc *rxd =
590 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
591 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594 val = VMXNET3_RXD_BTYPE_HEAD;
596 val = VMXNET3_RXD_BTYPE_BODY;
599 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
600 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
602 rxd->addr = buf_info->bufPA;
604 rxd->len = buf_info->len;
605 rxd->gen = ring->gen;
607 vmxnet3_cmd_ring_adv_next2fill(ring);
610 * Allocates mbufs and clusters. Post rx descriptors with buffer details
611 * so that device can receive packets in those buffers.
613 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
614 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
615 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
616 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
620 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
623 uint32_t i = 0, val = 0;
624 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
627 /* Usually: One HEAD type buf per packet
628 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
629 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
632 /* We use single packet buffer so all heads here */
633 val = VMXNET3_RXD_BTYPE_HEAD;
635 /* All BODY type buffers for 2nd ring */
636 val = VMXNET3_RXD_BTYPE_BODY;
639 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640 struct Vmxnet3_RxDesc *rxd;
641 struct rte_mbuf *mbuf;
642 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
644 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
646 /* Allocate blank mbuf for the current Rx Descriptor */
647 mbuf = rte_mbuf_raw_alloc(rxq->mp);
648 if (unlikely(mbuf == NULL)) {
649 PMD_RX_LOG(ERR, "Error allocating mbuf");
650 rxq->stats.rx_buf_alloc_failure++;
656 * Load mbuf pointer into buf_info[ring_size]
657 * buf_info structure is equivalent to cookie for virtio-virtqueue
660 buf_info->len = (uint16_t)(mbuf->buf_len -
661 RTE_PKTMBUF_HEADROOM);
662 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
664 /* Load Rx Descriptor with the buffer's GPA */
665 rxd->addr = buf_info->bufPA;
667 /* After this point rxd->addr MUST not be NULL */
669 rxd->len = buf_info->len;
670 /* Flip gen bit at the end to change ownership */
671 rxd->gen = ring->gen;
673 vmxnet3_cmd_ring_adv_next2fill(ring);
677 /* Return error only if no buffers are posted at present */
678 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
685 /* Receive side checksum and other offloads */
687 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
690 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
691 rxm->ol_flags |= PKT_RX_RSS_HASH;
692 rxm->hash.rss = rcd->rssHash;
695 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
697 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
698 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
700 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
701 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
703 rxm->packet_type = RTE_PTYPE_L3_IPV4;
707 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
709 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
710 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
716 * Process the Rx Completion Ring of given vmxnet3_rx_queue
717 * for nb_pkts burst and return the number of packets received
720 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
723 uint32_t nb_rxd, idx;
725 vmxnet3_rx_queue_t *rxq;
726 Vmxnet3_RxCompDesc *rcd;
727 vmxnet3_buf_info_t *rbi;
729 struct rte_mbuf *rxm = NULL;
730 struct vmxnet3_hw *hw;
740 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
742 if (unlikely(rxq->stopped)) {
743 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
747 while (rcd->gen == rxq->comp_ring.gen) {
748 struct rte_mbuf *newm;
750 if (nb_rx >= nb_pkts)
753 newm = rte_mbuf_raw_alloc(rxq->mp);
754 if (unlikely(newm == NULL)) {
755 PMD_RX_LOG(ERR, "Error allocating mbuf");
756 rxq->stats.rx_buf_alloc_failure++;
761 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
762 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
763 RTE_SET_USED(rxd); /* used only for assert when enabled */
764 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
766 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
768 RTE_ASSERT(rcd->len <= rxd->len);
771 /* Get the packet buffer pointer from buf_info */
774 /* Clear descriptor associated buf_info to be reused */
778 /* Update the index that we received a packet */
779 rxq->cmd_ring[ring_idx].next2comp = idx;
781 /* For RCD with EOP set, check if there is frame error */
782 if (unlikely(rcd->eop && rcd->err)) {
783 rxq->stats.drop_total++;
784 rxq->stats.drop_err++;
787 rxq->stats.drop_fcs++;
788 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
790 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
791 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
792 rxq->comp_ring.base), rcd->rxdIdx);
793 rte_pktmbuf_free_seg(rxm);
797 /* Initialize newly received packet buffer */
798 rxm->port = rxq->port_id;
801 rxm->pkt_len = (uint16_t)rcd->len;
802 rxm->data_len = (uint16_t)rcd->len;
803 rxm->data_off = RTE_PKTMBUF_HEADROOM;
808 * If this is the first buffer of the received packet,
809 * set the pointer to the first mbuf of the packet
810 * Otherwise, update the total length and the number of segments
811 * of the current scattered packet, and update the pointer to
812 * the last mbuf of the current packet.
815 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
817 if (unlikely(rcd->len == 0)) {
818 RTE_ASSERT(rcd->eop);
821 "Rx buf was skipped. rxring[%d][%d])",
823 rte_pktmbuf_free_seg(rxm);
827 rxq->start_seg = rxm;
828 vmxnet3_rx_offload(rcd, rxm);
830 struct rte_mbuf *start = rxq->start_seg;
832 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
834 start->pkt_len += rxm->data_len;
837 rxq->last_seg->next = rxm;
842 struct rte_mbuf *start = rxq->start_seg;
844 /* Check for hardware stripped VLAN tag */
846 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
847 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
850 rx_pkts[nb_rx++] = start;
851 rxq->start_seg = NULL;
855 rxq->cmd_ring[ring_idx].next2comp = idx;
856 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
857 rxq->cmd_ring[ring_idx].size);
859 /* It's time to renew descriptors */
860 vmxnet3_renew_desc(rxq, ring_idx, newm);
861 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
862 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
863 rxq->cmd_ring[ring_idx].next2fill);
866 /* Advance to the next descriptor in comp_ring */
867 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
869 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
871 if (nb_rxd > rxq->cmd_ring[0].size) {
872 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
873 " relinquish control.");
882 * Create memzone for device rings. malloc can't be used as the physical address is
883 * needed. If the memzone is already created, then this function returns a ptr
886 static const struct rte_memzone *
887 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
888 uint16_t queue_id, uint32_t ring_size, int socket_id)
890 char z_name[RTE_MEMZONE_NAMESIZE];
891 const struct rte_memzone *mz;
893 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
894 dev->driver->pci_drv.driver.name, ring_name,
895 dev->data->port_id, queue_id);
897 mz = rte_memzone_lookup(z_name);
901 return rte_memzone_reserve_aligned(z_name, ring_size,
902 socket_id, 0, VMXNET3_RING_BA_ALIGN);
906 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
909 unsigned int socket_id,
910 __rte_unused const struct rte_eth_txconf *tx_conf)
912 struct vmxnet3_hw *hw = dev->data->dev_private;
913 const struct rte_memzone *mz;
914 struct vmxnet3_tx_queue *txq;
915 struct vmxnet3_cmd_ring *ring;
916 struct vmxnet3_comp_ring *comp_ring;
917 struct vmxnet3_data_ring *data_ring;
920 PMD_INIT_FUNC_TRACE();
922 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
923 ETH_TXQ_FLAGS_NOXSUMSCTP) {
924 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
928 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
929 RTE_CACHE_LINE_SIZE);
931 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
935 txq->queue_id = queue_idx;
936 txq->port_id = dev->data->port_id;
937 txq->shared = &hw->tqd_start[queue_idx];
939 txq->qid = queue_idx;
941 txq->txdata_desc_size = hw->txdata_desc_size;
943 ring = &txq->cmd_ring;
944 comp_ring = &txq->comp_ring;
945 data_ring = &txq->data_ring;
947 /* Tx vmxnet ring length should be between 512-4096 */
948 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
949 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
950 VMXNET3_DEF_TX_RING_SIZE);
952 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
953 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
954 VMXNET3_TX_RING_MAX_SIZE);
957 ring->size = nb_desc;
958 ring->size &= ~VMXNET3_RING_SIZE_MASK;
960 comp_ring->size = data_ring->size = ring->size;
962 /* Tx vmxnet rings structure initialization*/
965 ring->gen = VMXNET3_INIT_GEN;
966 comp_ring->next2proc = 0;
967 comp_ring->gen = VMXNET3_INIT_GEN;
969 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
970 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
971 size += txq->txdata_desc_size * data_ring->size;
973 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
975 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
978 memset(mz->addr, 0, mz->len);
980 /* cmd_ring initialization */
981 ring->base = mz->addr;
982 ring->basePA = mz->phys_addr;
984 /* comp_ring initialization */
985 comp_ring->base = ring->base + ring->size;
986 comp_ring->basePA = ring->basePA +
987 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
989 /* data_ring initialization */
990 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
991 data_ring->basePA = comp_ring->basePA +
992 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
994 /* cmd_ring0 buf_info allocation */
995 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
996 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
997 if (ring->buf_info == NULL) {
998 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1002 /* Update the data portion with txq */
1003 dev->data->tx_queues[queue_idx] = txq;
1009 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1012 unsigned int socket_id,
1013 __rte_unused const struct rte_eth_rxconf *rx_conf,
1014 struct rte_mempool *mp)
1016 const struct rte_memzone *mz;
1017 struct vmxnet3_rx_queue *rxq;
1018 struct vmxnet3_hw *hw = dev->data->dev_private;
1019 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1020 struct vmxnet3_comp_ring *comp_ring;
1025 PMD_INIT_FUNC_TRACE();
1027 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1028 RTE_CACHE_LINE_SIZE);
1030 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1035 rxq->queue_id = queue_idx;
1036 rxq->port_id = dev->data->port_id;
1037 rxq->shared = &hw->rqd_start[queue_idx];
1039 rxq->qid1 = queue_idx;
1040 rxq->qid2 = queue_idx + hw->num_rx_queues;
1041 rxq->stopped = TRUE;
1043 ring0 = &rxq->cmd_ring[0];
1044 ring1 = &rxq->cmd_ring[1];
1045 comp_ring = &rxq->comp_ring;
1047 /* Rx vmxnet rings length should be between 256-4096 */
1048 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1049 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1051 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1052 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1055 ring0->size = nb_desc;
1056 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1057 ring1->size = ring0->size;
1060 comp_ring->size = ring0->size + ring1->size;
1062 /* Rx vmxnet rings structure initialization */
1063 ring0->next2fill = 0;
1064 ring1->next2fill = 0;
1065 ring0->next2comp = 0;
1066 ring1->next2comp = 0;
1067 ring0->gen = VMXNET3_INIT_GEN;
1068 ring1->gen = VMXNET3_INIT_GEN;
1069 comp_ring->next2proc = 0;
1070 comp_ring->gen = VMXNET3_INIT_GEN;
1072 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1073 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1075 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1077 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1080 memset(mz->addr, 0, mz->len);
1082 /* cmd_ring0 initialization */
1083 ring0->base = mz->addr;
1084 ring0->basePA = mz->phys_addr;
1086 /* cmd_ring1 initialization */
1087 ring1->base = ring0->base + ring0->size;
1088 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1090 /* comp_ring initialization */
1091 comp_ring->base = ring1->base + ring1->size;
1092 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1095 /* cmd_ring0-cmd_ring1 buf_info allocation */
1096 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1098 ring = &rxq->cmd_ring[i];
1100 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1102 ring->buf_info = rte_zmalloc(mem_name,
1103 ring->size * sizeof(vmxnet3_buf_info_t),
1104 RTE_CACHE_LINE_SIZE);
1105 if (ring->buf_info == NULL) {
1106 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1111 /* Update the data portion with rxq */
1112 dev->data->rx_queues[queue_idx] = rxq;
1118 * Initializes Receive Unit
1119 * Load mbufs in rx queue in advance
1122 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1124 struct vmxnet3_hw *hw = dev->data->dev_private;
1129 PMD_INIT_FUNC_TRACE();
1131 for (i = 0; i < hw->num_rx_queues; i++) {
1132 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1134 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1135 /* Passing 0 as alloc_num will allocate full ring */
1136 ret = vmxnet3_post_rx_bufs(rxq, j);
1139 "ERROR: Posting Rxq: %d buffers ring: %d",
1144 * Updating device with the index:next2fill to fill the
1145 * mbufs for coming packets.
1147 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1148 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1149 rxq->cmd_ring[j].next2fill);
1152 rxq->stopped = FALSE;
1153 rxq->start_seg = NULL;
1156 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1157 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1159 txq->stopped = FALSE;
1165 static uint8_t rss_intel_key[40] = {
1166 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1167 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1168 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1169 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1170 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1174 * Configure RSS feature
1177 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1179 struct vmxnet3_hw *hw = dev->data->dev_private;
1180 struct VMXNET3_RSSConf *dev_rss_conf;
1181 struct rte_eth_rss_conf *port_rss_conf;
1185 PMD_INIT_FUNC_TRACE();
1187 dev_rss_conf = hw->rss_conf;
1188 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1190 /* loading hashFunc */
1191 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1192 /* loading hashKeySize */
1193 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1194 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1195 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1197 if (port_rss_conf->rss_key == NULL) {
1198 /* Default hash key */
1199 port_rss_conf->rss_key = rss_intel_key;
1202 /* loading hashKey */
1203 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1204 dev_rss_conf->hashKeySize);
1206 /* loading indTable */
1207 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1208 if (j == dev->data->nb_rx_queues)
1210 dev_rss_conf->indTable[i] = j;
1213 /* loading hashType */
1214 dev_rss_conf->hashType = 0;
1215 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1216 if (rss_hf & ETH_RSS_IPV4)
1217 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1218 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1219 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1220 if (rss_hf & ETH_RSS_IPV6)
1221 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1222 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1223 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1225 return VMXNET3_SUCCESS;