4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
208 vmxnet3_dev_rx_queue_release(void *rxq)
211 vmxnet3_rx_queue_t *rq = rxq;
215 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
218 /* Release both the cmd_rings */
219 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
225 vmxnet3_dev_tx_queue_reset(void *txq)
227 vmxnet3_tx_queue_t *tq = txq;
228 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
234 /* Release the cmd_ring mbufs */
235 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
238 /* Tx vmxnet rings structure initialization*/
241 ring->gen = VMXNET3_INIT_GEN;
242 comp_ring->next2proc = 0;
243 comp_ring->gen = VMXNET3_INIT_GEN;
245 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247 size += tq->txdata_desc_size * data_ring->size;
249 memset(ring->base, 0, size);
253 vmxnet3_dev_rx_queue_reset(void *rxq)
256 vmxnet3_rx_queue_t *rq = rxq;
257 struct vmxnet3_hw *hw = rq->hw;
258 struct vmxnet3_cmd_ring *ring0, *ring1;
259 struct vmxnet3_comp_ring *comp_ring;
260 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
264 /* Release both the cmd_rings mbufs */
265 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
266 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
269 ring0 = &rq->cmd_ring[0];
270 ring1 = &rq->cmd_ring[1];
271 comp_ring = &rq->comp_ring;
273 /* Rx vmxnet rings structure initialization */
274 ring0->next2fill = 0;
275 ring1->next2fill = 0;
276 ring0->next2comp = 0;
277 ring1->next2comp = 0;
278 ring0->gen = VMXNET3_INIT_GEN;
279 ring1->gen = VMXNET3_INIT_GEN;
280 comp_ring->next2proc = 0;
281 comp_ring->gen = VMXNET3_INIT_GEN;
283 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
284 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
285 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
286 size += rq->data_desc_size * data_ring->size;
288 memset(ring0->base, 0, size);
292 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
296 PMD_INIT_FUNC_TRACE();
298 for (i = 0; i < dev->data->nb_tx_queues; i++) {
299 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
303 vmxnet3_dev_tx_queue_reset(txq);
307 for (i = 0; i < dev->data->nb_rx_queues; i++) {
308 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
312 vmxnet3_dev_rx_queue_reset(rxq);
318 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
321 struct rte_mbuf *mbuf;
323 /* Release cmd_ring descriptor and free mbuf */
324 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
326 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
328 rte_panic("EOP desc does not point to a valid mbuf");
329 rte_pktmbuf_free(mbuf);
331 txq->cmd_ring.buf_info[eop_idx].m = NULL;
333 while (txq->cmd_ring.next2comp != eop_idx) {
334 /* no out-of-order completion */
335 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
336 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
340 /* Mark the txd for which tcd was generated as completed */
341 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
343 return completed + 1;
347 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
350 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
351 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
352 (comp_ring->base + comp_ring->next2proc);
354 while (tcd->gen == comp_ring->gen) {
355 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
357 vmxnet3_comp_ring_adv_next2proc(comp_ring);
358 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
359 comp_ring->next2proc);
362 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
366 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
374 for (i = 0; i != nb_pkts; i++) {
376 ol_flags = m->ol_flags;
378 /* Non-TSO packet cannot occupy more than
379 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
381 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
382 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
387 /* check that only supported TX offloads are requested. */
388 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
389 (ol_flags & PKT_TX_L4_MASK) ==
391 rte_errno = -ENOTSUP;
395 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
396 ret = rte_validate_tx_offload(m);
402 ret = rte_net_intel_cksum_prepare(m);
413 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
417 vmxnet3_tx_queue_t *txq = tx_queue;
418 struct vmxnet3_hw *hw = txq->hw;
419 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
420 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
422 if (unlikely(txq->stopped)) {
423 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
427 /* Free up the comp_descriptors aggressively */
428 vmxnet3_tq_tx_complete(txq);
431 while (nb_tx < nb_pkts) {
432 Vmxnet3_GenericDesc *gdesc;
433 vmxnet3_buf_info_t *tbi;
434 uint32_t first2fill, avail, dw2;
435 struct rte_mbuf *txm = tx_pkts[nb_tx];
436 struct rte_mbuf *m_seg = txm;
438 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
439 /* # of descriptors needed for a packet. */
440 unsigned count = txm->nb_segs;
442 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
444 /* Is command ring full? */
445 if (unlikely(avail == 0)) {
446 PMD_TX_LOG(DEBUG, "No free ring descriptors");
447 txq->stats.tx_ring_full++;
448 txq->stats.drop_total += (nb_pkts - nb_tx);
452 /* Command ring is not full but cannot handle the
453 * multi-segmented packet. Let's try the next packet
456 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
457 "(avail %d needed %d)", avail, count);
458 txq->stats.drop_total++;
460 txq->stats.drop_tso++;
461 rte_pktmbuf_free(txm);
466 /* Drop non-TSO packet that is excessively fragmented */
467 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
468 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
469 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
470 txq->stats.drop_too_many_segs++;
471 txq->stats.drop_total++;
472 rte_pktmbuf_free(txm);
477 if (txm->nb_segs == 1 &&
478 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
479 struct Vmxnet3_TxDataDesc *tdd;
481 tdd = (struct Vmxnet3_TxDataDesc *)
482 ((uint8 *)txq->data_ring.base +
483 txq->cmd_ring.next2fill *
484 txq->txdata_desc_size);
485 copy_size = rte_pktmbuf_pkt_len(txm);
486 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
489 /* use the previous gen bit for the SOP desc */
490 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
491 first2fill = txq->cmd_ring.next2fill;
493 /* Remember the transmit buffer for cleanup */
494 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
496 /* NB: the following assumes that VMXNET3 maximum
497 * transmit buffer size (16K) is greater than
498 * maximum size of mbuf segment size.
500 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
502 uint64 offset = txq->cmd_ring.next2fill *
503 txq->txdata_desc_size;
505 rte_cpu_to_le_64(txq->data_ring.basePA +
508 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
511 gdesc->dword[2] = dw2 | m_seg->data_len;
514 /* move to the next2fill descriptor */
515 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
517 /* use the right gen for non-SOP desc */
518 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
519 } while ((m_seg = m_seg->next) != NULL);
521 /* set the last buf_info for the pkt */
523 /* Update the EOP descriptor */
524 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
526 /* Add VLAN tag if present */
527 gdesc = txq->cmd_ring.base + first2fill;
528 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
530 gdesc->txd.tci = txm->vlan_tci;
534 uint16_t mss = txm->tso_segsz;
538 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
539 gdesc->txd.om = VMXNET3_OM_TSO;
540 gdesc->txd.msscof = mss;
542 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
543 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
544 gdesc->txd.om = VMXNET3_OM_CSUM;
545 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
547 switch (txm->ol_flags & PKT_TX_L4_MASK) {
548 case PKT_TX_TCP_CKSUM:
549 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
551 case PKT_TX_UDP_CKSUM:
552 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
555 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556 txm->ol_flags & PKT_TX_L4_MASK);
562 gdesc->txd.om = VMXNET3_OM_NONE;
563 gdesc->txd.msscof = 0;
567 /* flip the GEN bit on the SOP */
568 rte_compiler_barrier();
569 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
571 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
575 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
577 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578 txq_ctrl->txNumDeferred = 0;
579 /* Notify vSwitch that packets are available. */
580 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581 txq->cmd_ring.next2fill);
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589 struct rte_mbuf *mbuf)
592 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593 struct Vmxnet3_RxDesc *rxd =
594 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
598 val = VMXNET3_RXD_BTYPE_HEAD;
600 val = VMXNET3_RXD_BTYPE_BODY;
603 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
604 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
606 rxd->addr = buf_info->bufPA;
608 rxd->len = buf_info->len;
609 rxd->gen = ring->gen;
611 vmxnet3_cmd_ring_adv_next2fill(ring);
614 * Allocates mbufs and clusters. Post rx descriptors with buffer details
615 * so that device can receive packets in those buffers.
617 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
618 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
619 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
620 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
624 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
627 uint32_t i = 0, val = 0;
628 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
631 /* Usually: One HEAD type buf per packet
632 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
633 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
636 /* We use single packet buffer so all heads here */
637 val = VMXNET3_RXD_BTYPE_HEAD;
639 /* All BODY type buffers for 2nd ring */
640 val = VMXNET3_RXD_BTYPE_BODY;
643 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
644 struct Vmxnet3_RxDesc *rxd;
645 struct rte_mbuf *mbuf;
646 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
648 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
650 /* Allocate blank mbuf for the current Rx Descriptor */
651 mbuf = rte_mbuf_raw_alloc(rxq->mp);
652 if (unlikely(mbuf == NULL)) {
653 PMD_RX_LOG(ERR, "Error allocating mbuf");
654 rxq->stats.rx_buf_alloc_failure++;
660 * Load mbuf pointer into buf_info[ring_size]
661 * buf_info structure is equivalent to cookie for virtio-virtqueue
664 buf_info->len = (uint16_t)(mbuf->buf_len -
665 RTE_PKTMBUF_HEADROOM);
666 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
668 /* Load Rx Descriptor with the buffer's GPA */
669 rxd->addr = buf_info->bufPA;
671 /* After this point rxd->addr MUST not be NULL */
673 rxd->len = buf_info->len;
674 /* Flip gen bit at the end to change ownership */
675 rxd->gen = ring->gen;
677 vmxnet3_cmd_ring_adv_next2fill(ring);
681 /* Return error only if no buffers are posted at present */
682 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
689 /* Receive side checksum and other offloads */
691 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
694 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
695 rxm->ol_flags |= PKT_RX_RSS_HASH;
696 rxm->hash.rss = rcd->rssHash;
699 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
701 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
702 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
704 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
705 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
707 rxm->packet_type = RTE_PTYPE_L3_IPV4;
711 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
713 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
714 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
720 * Process the Rx Completion Ring of given vmxnet3_rx_queue
721 * for nb_pkts burst and return the number of packets received
724 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
727 uint32_t nb_rxd, idx;
729 vmxnet3_rx_queue_t *rxq;
730 Vmxnet3_RxCompDesc *rcd;
731 vmxnet3_buf_info_t *rbi;
733 struct rte_mbuf *rxm = NULL;
734 struct vmxnet3_hw *hw;
744 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
746 if (unlikely(rxq->stopped)) {
747 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
751 while (rcd->gen == rxq->comp_ring.gen) {
752 struct rte_mbuf *newm;
754 if (nb_rx >= nb_pkts)
757 newm = rte_mbuf_raw_alloc(rxq->mp);
758 if (unlikely(newm == NULL)) {
759 PMD_RX_LOG(ERR, "Error allocating mbuf");
760 rxq->stats.rx_buf_alloc_failure++;
765 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
766 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
767 RTE_SET_USED(rxd); /* used only for assert when enabled */
768 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
770 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
772 RTE_ASSERT(rcd->len <= rxd->len);
775 /* Get the packet buffer pointer from buf_info */
778 /* Clear descriptor associated buf_info to be reused */
782 /* Update the index that we received a packet */
783 rxq->cmd_ring[ring_idx].next2comp = idx;
785 /* For RCD with EOP set, check if there is frame error */
786 if (unlikely(rcd->eop && rcd->err)) {
787 rxq->stats.drop_total++;
788 rxq->stats.drop_err++;
791 rxq->stats.drop_fcs++;
792 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
794 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
795 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
796 rxq->comp_ring.base), rcd->rxdIdx);
797 rte_pktmbuf_free_seg(rxm);
801 /* Initialize newly received packet buffer */
802 rxm->port = rxq->port_id;
805 rxm->pkt_len = (uint16_t)rcd->len;
806 rxm->data_len = (uint16_t)rcd->len;
807 rxm->data_off = RTE_PKTMBUF_HEADROOM;
812 * If this is the first buffer of the received packet,
813 * set the pointer to the first mbuf of the packet
814 * Otherwise, update the total length and the number of segments
815 * of the current scattered packet, and update the pointer to
816 * the last mbuf of the current packet.
819 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
821 if (unlikely(rcd->len == 0)) {
822 RTE_ASSERT(rcd->eop);
825 "Rx buf was skipped. rxring[%d][%d])",
827 rte_pktmbuf_free_seg(rxm);
831 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
832 uint8_t *rdd = rxq->data_ring.base +
833 idx * rxq->data_desc_size;
835 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
836 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
840 rxq->start_seg = rxm;
841 vmxnet3_rx_offload(rcd, rxm);
843 struct rte_mbuf *start = rxq->start_seg;
845 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
847 start->pkt_len += rxm->data_len;
850 rxq->last_seg->next = rxm;
855 struct rte_mbuf *start = rxq->start_seg;
857 /* Check for hardware stripped VLAN tag */
859 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
860 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
863 rx_pkts[nb_rx++] = start;
864 rxq->start_seg = NULL;
868 rxq->cmd_ring[ring_idx].next2comp = idx;
869 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
870 rxq->cmd_ring[ring_idx].size);
872 /* It's time to renew descriptors */
873 vmxnet3_renew_desc(rxq, ring_idx, newm);
874 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
875 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
876 rxq->cmd_ring[ring_idx].next2fill);
879 /* Advance to the next descriptor in comp_ring */
880 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
882 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
884 if (nb_rxd > rxq->cmd_ring[0].size) {
885 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
886 " relinquish control.");
895 * Create memzone for device rings. malloc can't be used as the physical address is
896 * needed. If the memzone is already created, then this function returns a ptr
899 static const struct rte_memzone *
900 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
901 uint16_t queue_id, uint32_t ring_size, int socket_id)
903 char z_name[RTE_MEMZONE_NAMESIZE];
904 const struct rte_memzone *mz;
906 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
907 dev->driver->pci_drv.driver.name, ring_name,
908 dev->data->port_id, queue_id);
910 mz = rte_memzone_lookup(z_name);
914 return rte_memzone_reserve_aligned(z_name, ring_size,
915 socket_id, 0, VMXNET3_RING_BA_ALIGN);
919 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
922 unsigned int socket_id,
923 __rte_unused const struct rte_eth_txconf *tx_conf)
925 struct vmxnet3_hw *hw = dev->data->dev_private;
926 const struct rte_memzone *mz;
927 struct vmxnet3_tx_queue *txq;
928 struct vmxnet3_cmd_ring *ring;
929 struct vmxnet3_comp_ring *comp_ring;
930 struct vmxnet3_data_ring *data_ring;
933 PMD_INIT_FUNC_TRACE();
935 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
936 ETH_TXQ_FLAGS_NOXSUMSCTP) {
937 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
941 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
942 RTE_CACHE_LINE_SIZE);
944 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
948 txq->queue_id = queue_idx;
949 txq->port_id = dev->data->port_id;
950 txq->shared = &hw->tqd_start[queue_idx];
952 txq->qid = queue_idx;
954 txq->txdata_desc_size = hw->txdata_desc_size;
956 ring = &txq->cmd_ring;
957 comp_ring = &txq->comp_ring;
958 data_ring = &txq->data_ring;
960 /* Tx vmxnet ring length should be between 512-4096 */
961 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
962 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
963 VMXNET3_DEF_TX_RING_SIZE);
965 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
966 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
967 VMXNET3_TX_RING_MAX_SIZE);
970 ring->size = nb_desc;
971 ring->size &= ~VMXNET3_RING_SIZE_MASK;
973 comp_ring->size = data_ring->size = ring->size;
975 /* Tx vmxnet rings structure initialization*/
978 ring->gen = VMXNET3_INIT_GEN;
979 comp_ring->next2proc = 0;
980 comp_ring->gen = VMXNET3_INIT_GEN;
982 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
983 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
984 size += txq->txdata_desc_size * data_ring->size;
986 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
988 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
991 memset(mz->addr, 0, mz->len);
993 /* cmd_ring initialization */
994 ring->base = mz->addr;
995 ring->basePA = mz->phys_addr;
997 /* comp_ring initialization */
998 comp_ring->base = ring->base + ring->size;
999 comp_ring->basePA = ring->basePA +
1000 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1002 /* data_ring initialization */
1003 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1004 data_ring->basePA = comp_ring->basePA +
1005 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1007 /* cmd_ring0 buf_info allocation */
1008 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1009 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1010 if (ring->buf_info == NULL) {
1011 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1015 /* Update the data portion with txq */
1016 dev->data->tx_queues[queue_idx] = txq;
1022 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1025 unsigned int socket_id,
1026 __rte_unused const struct rte_eth_rxconf *rx_conf,
1027 struct rte_mempool *mp)
1029 const struct rte_memzone *mz;
1030 struct vmxnet3_rx_queue *rxq;
1031 struct vmxnet3_hw *hw = dev->data->dev_private;
1032 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1033 struct vmxnet3_comp_ring *comp_ring;
1034 struct vmxnet3_rx_data_ring *data_ring;
1039 PMD_INIT_FUNC_TRACE();
1041 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1042 RTE_CACHE_LINE_SIZE);
1044 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1049 rxq->queue_id = queue_idx;
1050 rxq->port_id = dev->data->port_id;
1051 rxq->shared = &hw->rqd_start[queue_idx];
1053 rxq->qid1 = queue_idx;
1054 rxq->qid2 = queue_idx + hw->num_rx_queues;
1055 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1056 rxq->data_desc_size = hw->rxdata_desc_size;
1057 rxq->stopped = TRUE;
1059 ring0 = &rxq->cmd_ring[0];
1060 ring1 = &rxq->cmd_ring[1];
1061 comp_ring = &rxq->comp_ring;
1062 data_ring = &rxq->data_ring;
1064 /* Rx vmxnet rings length should be between 256-4096 */
1065 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1066 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1068 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1069 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1072 ring0->size = nb_desc;
1073 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1074 ring1->size = ring0->size;
1077 comp_ring->size = ring0->size + ring1->size;
1078 data_ring->size = ring0->size;
1080 /* Rx vmxnet rings structure initialization */
1081 ring0->next2fill = 0;
1082 ring1->next2fill = 0;
1083 ring0->next2comp = 0;
1084 ring1->next2comp = 0;
1085 ring0->gen = VMXNET3_INIT_GEN;
1086 ring1->gen = VMXNET3_INIT_GEN;
1087 comp_ring->next2proc = 0;
1088 comp_ring->gen = VMXNET3_INIT_GEN;
1090 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1091 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1092 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1093 size += rxq->data_desc_size * data_ring->size;
1095 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1097 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1100 memset(mz->addr, 0, mz->len);
1102 /* cmd_ring0 initialization */
1103 ring0->base = mz->addr;
1104 ring0->basePA = mz->phys_addr;
1106 /* cmd_ring1 initialization */
1107 ring1->base = ring0->base + ring0->size;
1108 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1110 /* comp_ring initialization */
1111 comp_ring->base = ring1->base + ring1->size;
1112 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1115 /* data_ring initialization */
1116 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1118 (uint8_t *)(comp_ring->base + comp_ring->size);
1119 data_ring->basePA = comp_ring->basePA +
1120 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1123 /* cmd_ring0-cmd_ring1 buf_info allocation */
1124 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1126 ring = &rxq->cmd_ring[i];
1128 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1130 ring->buf_info = rte_zmalloc(mem_name,
1131 ring->size * sizeof(vmxnet3_buf_info_t),
1132 RTE_CACHE_LINE_SIZE);
1133 if (ring->buf_info == NULL) {
1134 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1139 /* Update the data portion with rxq */
1140 dev->data->rx_queues[queue_idx] = rxq;
1146 * Initializes Receive Unit
1147 * Load mbufs in rx queue in advance
1150 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1152 struct vmxnet3_hw *hw = dev->data->dev_private;
1157 PMD_INIT_FUNC_TRACE();
1159 for (i = 0; i < hw->num_rx_queues; i++) {
1160 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1162 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1163 /* Passing 0 as alloc_num will allocate full ring */
1164 ret = vmxnet3_post_rx_bufs(rxq, j);
1167 "ERROR: Posting Rxq: %d buffers ring: %d",
1172 * Updating device with the index:next2fill to fill the
1173 * mbufs for coming packets.
1175 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1176 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1177 rxq->cmd_ring[j].next2fill);
1180 rxq->stopped = FALSE;
1181 rxq->start_seg = NULL;
1184 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1187 txq->stopped = FALSE;
1193 static uint8_t rss_intel_key[40] = {
1194 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1195 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1196 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1197 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1198 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1202 * Configure RSS feature
1205 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1207 struct vmxnet3_hw *hw = dev->data->dev_private;
1208 struct VMXNET3_RSSConf *dev_rss_conf;
1209 struct rte_eth_rss_conf *port_rss_conf;
1213 PMD_INIT_FUNC_TRACE();
1215 dev_rss_conf = hw->rss_conf;
1216 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1218 /* loading hashFunc */
1219 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1220 /* loading hashKeySize */
1221 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1222 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1223 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1225 if (port_rss_conf->rss_key == NULL) {
1226 /* Default hash key */
1227 port_rss_conf->rss_key = rss_intel_key;
1230 /* loading hashKey */
1231 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1232 dev_rss_conf->hashKeySize);
1234 /* loading indTable */
1235 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1236 if (j == dev->data->nb_rx_queues)
1238 dev_rss_conf->indTable[i] = j;
1241 /* loading hashType */
1242 dev_rss_conf->hashType = 0;
1243 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1244 if (rss_hf & ETH_RSS_IPV4)
1245 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1246 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1247 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1248 if (rss_hf & ETH_RSS_IPV6)
1249 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1250 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1251 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1253 return VMXNET3_SUCCESS;