4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
208 vmxnet3_dev_rx_queue_release(void *rxq)
211 vmxnet3_rx_queue_t *rq = rxq;
215 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
218 /* Release both the cmd_rings */
219 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
225 vmxnet3_dev_tx_queue_reset(void *txq)
227 vmxnet3_tx_queue_t *tq = txq;
228 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
234 /* Release the cmd_ring mbufs */
235 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
238 /* Tx vmxnet rings structure initialization*/
241 ring->gen = VMXNET3_INIT_GEN;
242 comp_ring->next2proc = 0;
243 comp_ring->gen = VMXNET3_INIT_GEN;
245 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
249 memset(ring->base, 0, size);
253 vmxnet3_dev_rx_queue_reset(void *rxq)
256 vmxnet3_rx_queue_t *rq = rxq;
257 struct vmxnet3_cmd_ring *ring0, *ring1;
258 struct vmxnet3_comp_ring *comp_ring;
262 /* Release both the cmd_rings mbufs */
263 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
267 ring0 = &rq->cmd_ring[0];
268 ring1 = &rq->cmd_ring[1];
269 comp_ring = &rq->comp_ring;
271 /* Rx vmxnet rings structure initialization */
272 ring0->next2fill = 0;
273 ring1->next2fill = 0;
274 ring0->next2comp = 0;
275 ring1->next2comp = 0;
276 ring0->gen = VMXNET3_INIT_GEN;
277 ring1->gen = VMXNET3_INIT_GEN;
278 comp_ring->next2proc = 0;
279 comp_ring->gen = VMXNET3_INIT_GEN;
281 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
284 memset(ring0->base, 0, size);
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
292 PMD_INIT_FUNC_TRACE();
294 for (i = 0; i < dev->data->nb_tx_queues; i++) {
295 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
299 vmxnet3_dev_tx_queue_reset(txq);
303 for (i = 0; i < dev->data->nb_rx_queues; i++) {
304 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
308 vmxnet3_dev_rx_queue_reset(rxq);
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
317 struct rte_mbuf *mbuf;
319 /* Release cmd_ring descriptor and free mbuf */
320 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
322 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
324 rte_panic("EOP desc does not point to a valid mbuf");
325 rte_pktmbuf_free(mbuf);
327 txq->cmd_ring.buf_info[eop_idx].m = NULL;
329 while (txq->cmd_ring.next2comp != eop_idx) {
330 /* no out-of-order completion */
331 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
336 /* Mark the txd for which tcd was generated as completed */
337 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
339 return completed + 1;
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
346 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348 (comp_ring->base + comp_ring->next2proc);
350 while (tcd->gen == comp_ring->gen) {
351 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
353 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355 comp_ring->next2proc);
358 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
370 for (i = 0; i != nb_pkts; i++) {
372 ol_flags = m->ol_flags;
374 /* Non-TSO packet cannot occupy more than
375 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
377 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
383 /* check that only supported TX offloads are requested. */
384 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385 (ol_flags & PKT_TX_L4_MASK) ==
387 rte_errno = -ENOTSUP;
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392 ret = rte_validate_tx_offload(m);
398 ret = rte_net_intel_cksum_prepare(m);
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
413 vmxnet3_tx_queue_t *txq = tx_queue;
414 struct vmxnet3_hw *hw = txq->hw;
415 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
418 if (unlikely(txq->stopped)) {
419 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
423 /* Free up the comp_descriptors aggressively */
424 vmxnet3_tq_tx_complete(txq);
427 while (nb_tx < nb_pkts) {
428 Vmxnet3_GenericDesc *gdesc;
429 vmxnet3_buf_info_t *tbi;
430 uint32_t first2fill, avail, dw2;
431 struct rte_mbuf *txm = tx_pkts[nb_tx];
432 struct rte_mbuf *m_seg = txm;
434 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435 /* # of descriptors needed for a packet. */
436 unsigned count = txm->nb_segs;
438 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
440 /* Is command ring full? */
441 if (unlikely(avail == 0)) {
442 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443 txq->stats.tx_ring_full++;
444 txq->stats.drop_total += (nb_pkts - nb_tx);
448 /* Command ring is not full but cannot handle the
449 * multi-segmented packet. Let's try the next packet
452 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453 "(avail %d needed %d)", avail, count);
454 txq->stats.drop_total++;
456 txq->stats.drop_tso++;
457 rte_pktmbuf_free(txm);
462 /* Drop non-TSO packet that is excessively fragmented */
463 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466 txq->stats.drop_too_many_segs++;
467 txq->stats.drop_total++;
468 rte_pktmbuf_free(txm);
473 if (txm->nb_segs == 1 &&
474 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
475 struct Vmxnet3_TxDataDesc *tdd;
477 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
478 copy_size = rte_pktmbuf_pkt_len(txm);
479 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
482 /* use the previous gen bit for the SOP desc */
483 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
484 first2fill = txq->cmd_ring.next2fill;
486 /* Remember the transmit buffer for cleanup */
487 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
489 /* NB: the following assumes that VMXNET3 maximum
490 * transmit buffer size (16K) is greater than
491 * maximum size of mbuf segment size.
493 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
495 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
496 txq->cmd_ring.next2fill *
497 sizeof(struct Vmxnet3_TxDataDesc));
499 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
501 gdesc->dword[2] = dw2 | m_seg->data_len;
504 /* move to the next2fill descriptor */
505 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
507 /* use the right gen for non-SOP desc */
508 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
509 } while ((m_seg = m_seg->next) != NULL);
511 /* set the last buf_info for the pkt */
513 /* Update the EOP descriptor */
514 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
516 /* Add VLAN tag if present */
517 gdesc = txq->cmd_ring.base + first2fill;
518 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
520 gdesc->txd.tci = txm->vlan_tci;
524 uint16_t mss = txm->tso_segsz;
528 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
529 gdesc->txd.om = VMXNET3_OM_TSO;
530 gdesc->txd.msscof = mss;
532 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
533 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
534 gdesc->txd.om = VMXNET3_OM_CSUM;
535 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
537 switch (txm->ol_flags & PKT_TX_L4_MASK) {
538 case PKT_TX_TCP_CKSUM:
539 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
541 case PKT_TX_UDP_CKSUM:
542 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
545 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
546 txm->ol_flags & PKT_TX_L4_MASK);
552 gdesc->txd.om = VMXNET3_OM_NONE;
553 gdesc->txd.msscof = 0;
557 /* flip the GEN bit on the SOP */
558 rte_compiler_barrier();
559 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
561 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
565 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
567 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
568 txq_ctrl->txNumDeferred = 0;
569 /* Notify vSwitch that packets are available. */
570 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
571 txq->cmd_ring.next2fill);
578 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
579 struct rte_mbuf *mbuf)
582 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
583 struct Vmxnet3_RxDesc *rxd =
584 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
585 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
588 val = VMXNET3_RXD_BTYPE_HEAD;
590 val = VMXNET3_RXD_BTYPE_BODY;
593 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
594 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
596 rxd->addr = buf_info->bufPA;
598 rxd->len = buf_info->len;
599 rxd->gen = ring->gen;
601 vmxnet3_cmd_ring_adv_next2fill(ring);
604 * Allocates mbufs and clusters. Post rx descriptors with buffer details
605 * so that device can receive packets in those buffers.
607 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
608 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
609 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
610 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
614 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
617 uint32_t i = 0, val = 0;
618 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
621 /* Usually: One HEAD type buf per packet
622 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
623 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
626 /* We use single packet buffer so all heads here */
627 val = VMXNET3_RXD_BTYPE_HEAD;
629 /* All BODY type buffers for 2nd ring */
630 val = VMXNET3_RXD_BTYPE_BODY;
633 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
634 struct Vmxnet3_RxDesc *rxd;
635 struct rte_mbuf *mbuf;
636 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
638 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
640 /* Allocate blank mbuf for the current Rx Descriptor */
641 mbuf = rte_mbuf_raw_alloc(rxq->mp);
642 if (unlikely(mbuf == NULL)) {
643 PMD_RX_LOG(ERR, "Error allocating mbuf");
644 rxq->stats.rx_buf_alloc_failure++;
650 * Load mbuf pointer into buf_info[ring_size]
651 * buf_info structure is equivalent to cookie for virtio-virtqueue
654 buf_info->len = (uint16_t)(mbuf->buf_len -
655 RTE_PKTMBUF_HEADROOM);
656 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
658 /* Load Rx Descriptor with the buffer's GPA */
659 rxd->addr = buf_info->bufPA;
661 /* After this point rxd->addr MUST not be NULL */
663 rxd->len = buf_info->len;
664 /* Flip gen bit at the end to change ownership */
665 rxd->gen = ring->gen;
667 vmxnet3_cmd_ring_adv_next2fill(ring);
671 /* Return error only if no buffers are posted at present */
672 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
679 /* Receive side checksum and other offloads */
681 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
684 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
685 rxm->ol_flags |= PKT_RX_RSS_HASH;
686 rxm->hash.rss = rcd->rssHash;
689 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
691 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
692 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
694 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
695 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
697 rxm->packet_type = RTE_PTYPE_L3_IPV4;
701 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
703 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
704 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
710 * Process the Rx Completion Ring of given vmxnet3_rx_queue
711 * for nb_pkts burst and return the number of packets received
714 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
717 uint32_t nb_rxd, idx;
719 vmxnet3_rx_queue_t *rxq;
720 Vmxnet3_RxCompDesc *rcd;
721 vmxnet3_buf_info_t *rbi;
723 struct rte_mbuf *rxm = NULL;
724 struct vmxnet3_hw *hw;
734 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
736 if (unlikely(rxq->stopped)) {
737 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
741 while (rcd->gen == rxq->comp_ring.gen) {
742 struct rte_mbuf *newm;
744 if (nb_rx >= nb_pkts)
747 newm = rte_mbuf_raw_alloc(rxq->mp);
748 if (unlikely(newm == NULL)) {
749 PMD_RX_LOG(ERR, "Error allocating mbuf");
750 rxq->stats.rx_buf_alloc_failure++;
755 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
756 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
757 RTE_SET_USED(rxd); /* used only for assert when enabled */
758 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
760 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
762 RTE_ASSERT(rcd->len <= rxd->len);
765 /* Get the packet buffer pointer from buf_info */
768 /* Clear descriptor associated buf_info to be reused */
772 /* Update the index that we received a packet */
773 rxq->cmd_ring[ring_idx].next2comp = idx;
775 /* For RCD with EOP set, check if there is frame error */
776 if (unlikely(rcd->eop && rcd->err)) {
777 rxq->stats.drop_total++;
778 rxq->stats.drop_err++;
781 rxq->stats.drop_fcs++;
782 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
784 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
785 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
786 rxq->comp_ring.base), rcd->rxdIdx);
787 rte_pktmbuf_free_seg(rxm);
791 /* Initialize newly received packet buffer */
792 rxm->port = rxq->port_id;
795 rxm->pkt_len = (uint16_t)rcd->len;
796 rxm->data_len = (uint16_t)rcd->len;
797 rxm->data_off = RTE_PKTMBUF_HEADROOM;
802 * If this is the first buffer of the received packet,
803 * set the pointer to the first mbuf of the packet
804 * Otherwise, update the total length and the number of segments
805 * of the current scattered packet, and update the pointer to
806 * the last mbuf of the current packet.
809 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
811 if (unlikely(rcd->len == 0)) {
812 RTE_ASSERT(rcd->eop);
815 "Rx buf was skipped. rxring[%d][%d])",
817 rte_pktmbuf_free_seg(rxm);
821 rxq->start_seg = rxm;
822 vmxnet3_rx_offload(rcd, rxm);
824 struct rte_mbuf *start = rxq->start_seg;
826 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
828 start->pkt_len += rxm->data_len;
831 rxq->last_seg->next = rxm;
836 struct rte_mbuf *start = rxq->start_seg;
838 /* Check for hardware stripped VLAN tag */
840 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
841 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
844 rx_pkts[nb_rx++] = start;
845 rxq->start_seg = NULL;
849 rxq->cmd_ring[ring_idx].next2comp = idx;
850 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
851 rxq->cmd_ring[ring_idx].size);
853 /* It's time to renew descriptors */
854 vmxnet3_renew_desc(rxq, ring_idx, newm);
855 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
856 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
857 rxq->cmd_ring[ring_idx].next2fill);
860 /* Advance to the next descriptor in comp_ring */
861 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
863 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
865 if (nb_rxd > rxq->cmd_ring[0].size) {
866 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
867 " relinquish control.");
876 * Create memzone for device rings. malloc can't be used as the physical address is
877 * needed. If the memzone is already created, then this function returns a ptr
880 static const struct rte_memzone *
881 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
882 uint16_t queue_id, uint32_t ring_size, int socket_id)
884 char z_name[RTE_MEMZONE_NAMESIZE];
885 const struct rte_memzone *mz;
887 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
888 dev->driver->pci_drv.driver.name, ring_name,
889 dev->data->port_id, queue_id);
891 mz = rte_memzone_lookup(z_name);
895 return rte_memzone_reserve_aligned(z_name, ring_size,
896 socket_id, 0, VMXNET3_RING_BA_ALIGN);
900 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
903 unsigned int socket_id,
904 __rte_unused const struct rte_eth_txconf *tx_conf)
906 struct vmxnet3_hw *hw = dev->data->dev_private;
907 const struct rte_memzone *mz;
908 struct vmxnet3_tx_queue *txq;
909 struct vmxnet3_cmd_ring *ring;
910 struct vmxnet3_comp_ring *comp_ring;
911 struct vmxnet3_data_ring *data_ring;
914 PMD_INIT_FUNC_TRACE();
916 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
917 ETH_TXQ_FLAGS_NOXSUMSCTP) {
918 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
922 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
923 RTE_CACHE_LINE_SIZE);
925 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
929 txq->queue_id = queue_idx;
930 txq->port_id = dev->data->port_id;
931 txq->shared = &hw->tqd_start[queue_idx];
933 txq->qid = queue_idx;
936 ring = &txq->cmd_ring;
937 comp_ring = &txq->comp_ring;
938 data_ring = &txq->data_ring;
940 /* Tx vmxnet ring length should be between 512-4096 */
941 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
942 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
943 VMXNET3_DEF_TX_RING_SIZE);
945 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
946 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
947 VMXNET3_TX_RING_MAX_SIZE);
950 ring->size = nb_desc;
951 ring->size &= ~VMXNET3_RING_SIZE_MASK;
953 comp_ring->size = data_ring->size = ring->size;
955 /* Tx vmxnet rings structure initialization*/
958 ring->gen = VMXNET3_INIT_GEN;
959 comp_ring->next2proc = 0;
960 comp_ring->gen = VMXNET3_INIT_GEN;
962 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
963 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
964 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
966 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
968 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
971 memset(mz->addr, 0, mz->len);
973 /* cmd_ring initialization */
974 ring->base = mz->addr;
975 ring->basePA = mz->phys_addr;
977 /* comp_ring initialization */
978 comp_ring->base = ring->base + ring->size;
979 comp_ring->basePA = ring->basePA +
980 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
982 /* data_ring initialization */
983 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
984 data_ring->basePA = comp_ring->basePA +
985 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
987 /* cmd_ring0 buf_info allocation */
988 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
989 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
990 if (ring->buf_info == NULL) {
991 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
995 /* Update the data portion with txq */
996 dev->data->tx_queues[queue_idx] = txq;
1002 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1005 unsigned int socket_id,
1006 __rte_unused const struct rte_eth_rxconf *rx_conf,
1007 struct rte_mempool *mp)
1009 const struct rte_memzone *mz;
1010 struct vmxnet3_rx_queue *rxq;
1011 struct vmxnet3_hw *hw = dev->data->dev_private;
1012 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1013 struct vmxnet3_comp_ring *comp_ring;
1018 PMD_INIT_FUNC_TRACE();
1020 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1021 RTE_CACHE_LINE_SIZE);
1023 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1028 rxq->queue_id = queue_idx;
1029 rxq->port_id = dev->data->port_id;
1030 rxq->shared = &hw->rqd_start[queue_idx];
1032 rxq->qid1 = queue_idx;
1033 rxq->qid2 = queue_idx + hw->num_rx_queues;
1034 rxq->stopped = TRUE;
1036 ring0 = &rxq->cmd_ring[0];
1037 ring1 = &rxq->cmd_ring[1];
1038 comp_ring = &rxq->comp_ring;
1040 /* Rx vmxnet rings length should be between 256-4096 */
1041 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1042 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1044 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1045 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1048 ring0->size = nb_desc;
1049 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1050 ring1->size = ring0->size;
1053 comp_ring->size = ring0->size + ring1->size;
1055 /* Rx vmxnet rings structure initialization */
1056 ring0->next2fill = 0;
1057 ring1->next2fill = 0;
1058 ring0->next2comp = 0;
1059 ring1->next2comp = 0;
1060 ring0->gen = VMXNET3_INIT_GEN;
1061 ring1->gen = VMXNET3_INIT_GEN;
1062 comp_ring->next2proc = 0;
1063 comp_ring->gen = VMXNET3_INIT_GEN;
1065 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1066 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1068 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1070 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1073 memset(mz->addr, 0, mz->len);
1075 /* cmd_ring0 initialization */
1076 ring0->base = mz->addr;
1077 ring0->basePA = mz->phys_addr;
1079 /* cmd_ring1 initialization */
1080 ring1->base = ring0->base + ring0->size;
1081 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1083 /* comp_ring initialization */
1084 comp_ring->base = ring1->base + ring1->size;
1085 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1088 /* cmd_ring0-cmd_ring1 buf_info allocation */
1089 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1091 ring = &rxq->cmd_ring[i];
1093 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1095 ring->buf_info = rte_zmalloc(mem_name,
1096 ring->size * sizeof(vmxnet3_buf_info_t),
1097 RTE_CACHE_LINE_SIZE);
1098 if (ring->buf_info == NULL) {
1099 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1104 /* Update the data portion with rxq */
1105 dev->data->rx_queues[queue_idx] = rxq;
1111 * Initializes Receive Unit
1112 * Load mbufs in rx queue in advance
1115 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1117 struct vmxnet3_hw *hw = dev->data->dev_private;
1122 PMD_INIT_FUNC_TRACE();
1124 for (i = 0; i < hw->num_rx_queues; i++) {
1125 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1127 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1128 /* Passing 0 as alloc_num will allocate full ring */
1129 ret = vmxnet3_post_rx_bufs(rxq, j);
1132 "ERROR: Posting Rxq: %d buffers ring: %d",
1137 * Updating device with the index:next2fill to fill the
1138 * mbufs for coming packets.
1140 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1141 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1142 rxq->cmd_ring[j].next2fill);
1145 rxq->stopped = FALSE;
1146 rxq->start_seg = NULL;
1149 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1150 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1152 txq->stopped = FALSE;
1158 static uint8_t rss_intel_key[40] = {
1159 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1160 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1161 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1162 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1163 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1167 * Configure RSS feature
1170 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1172 struct vmxnet3_hw *hw = dev->data->dev_private;
1173 struct VMXNET3_RSSConf *dev_rss_conf;
1174 struct rte_eth_rss_conf *port_rss_conf;
1178 PMD_INIT_FUNC_TRACE();
1180 dev_rss_conf = hw->rss_conf;
1181 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1183 /* loading hashFunc */
1184 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1185 /* loading hashKeySize */
1186 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1187 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1188 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1190 if (port_rss_conf->rss_key == NULL) {
1191 /* Default hash key */
1192 port_rss_conf->rss_key = rss_intel_key;
1195 /* loading hashKey */
1196 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1197 dev_rss_conf->hashKeySize);
1199 /* loading indTable */
1200 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1201 if (j == dev->data->nb_rx_queues)
1203 dev_rss_conf->indTable[i] = j;
1206 /* loading hashType */
1207 dev_rss_conf->hashType = 0;
1208 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1209 if (rss_hf & ETH_RSS_IPV4)
1210 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1211 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1212 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1213 if (rss_hf & ETH_RSS_IPV6)
1214 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1215 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1216 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1218 return VMXNET3_SUCCESS;