4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
208 vmxnet3_dev_rx_queue_release(void *rxq)
211 vmxnet3_rx_queue_t *rq = rxq;
215 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
218 /* Release both the cmd_rings */
219 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
225 vmxnet3_dev_tx_queue_reset(void *txq)
227 vmxnet3_tx_queue_t *tq = txq;
228 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
234 /* Release the cmd_ring mbufs */
235 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
238 /* Tx vmxnet rings structure initialization*/
241 ring->gen = VMXNET3_INIT_GEN;
242 comp_ring->next2proc = 0;
243 comp_ring->gen = VMXNET3_INIT_GEN;
245 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
249 memset(ring->base, 0, size);
253 vmxnet3_dev_rx_queue_reset(void *rxq)
256 vmxnet3_rx_queue_t *rq = rxq;
257 struct vmxnet3_cmd_ring *ring0, *ring1;
258 struct vmxnet3_comp_ring *comp_ring;
262 /* Release both the cmd_rings mbufs */
263 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
267 ring0 = &rq->cmd_ring[0];
268 ring1 = &rq->cmd_ring[1];
269 comp_ring = &rq->comp_ring;
271 /* Rx vmxnet rings structure initialization */
272 ring0->next2fill = 0;
273 ring1->next2fill = 0;
274 ring0->next2comp = 0;
275 ring1->next2comp = 0;
276 ring0->gen = VMXNET3_INIT_GEN;
277 ring1->gen = VMXNET3_INIT_GEN;
278 comp_ring->next2proc = 0;
279 comp_ring->gen = VMXNET3_INIT_GEN;
281 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
284 memset(ring0->base, 0, size);
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
292 PMD_INIT_FUNC_TRACE();
294 for (i = 0; i < dev->data->nb_tx_queues; i++) {
295 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
299 vmxnet3_dev_tx_queue_reset(txq);
303 for (i = 0; i < dev->data->nb_rx_queues; i++) {
304 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
308 vmxnet3_dev_rx_queue_reset(rxq);
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
317 struct rte_mbuf *mbuf;
319 /* Release cmd_ring descriptor and free mbuf */
320 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
322 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
324 rte_panic("EOP desc does not point to a valid mbuf");
325 rte_pktmbuf_free(mbuf);
327 txq->cmd_ring.buf_info[eop_idx].m = NULL;
329 while (txq->cmd_ring.next2comp != eop_idx) {
330 /* no out-of-order completion */
331 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
336 /* Mark the txd for which tcd was generated as completed */
337 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
339 return completed + 1;
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
346 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348 (comp_ring->base + comp_ring->next2proc);
350 while (tcd->gen == comp_ring->gen) {
351 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
353 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355 comp_ring->next2proc);
358 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
370 for (i = 0; i != nb_pkts; i++) {
372 ol_flags = m->ol_flags;
374 /* Non-TSO packet cannot occupy more than
375 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
377 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
383 /* check that only supported TX offloads are requested. */
384 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385 (ol_flags & PKT_TX_L4_MASK) ==
387 rte_errno = -ENOTSUP;
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392 ret = rte_validate_tx_offload(m);
398 ret = rte_net_intel_cksum_prepare(m);
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
413 vmxnet3_tx_queue_t *txq = tx_queue;
414 struct vmxnet3_hw *hw = txq->hw;
415 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
418 if (unlikely(txq->stopped)) {
419 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
423 /* Free up the comp_descriptors aggressively */
424 vmxnet3_tq_tx_complete(txq);
427 while (nb_tx < nb_pkts) {
428 Vmxnet3_GenericDesc *gdesc;
429 vmxnet3_buf_info_t *tbi;
430 uint32_t first2fill, avail, dw2;
431 struct rte_mbuf *txm = tx_pkts[nb_tx];
432 struct rte_mbuf *m_seg = txm;
434 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435 /* # of descriptors needed for a packet. */
436 unsigned count = txm->nb_segs;
438 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
440 /* Is command ring full? */
441 if (unlikely(avail == 0)) {
442 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443 txq->stats.tx_ring_full++;
444 txq->stats.drop_total += (nb_pkts - nb_tx);
448 /* Command ring is not full but cannot handle the
449 * multi-segmented packet. Let's try the next packet
452 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453 "(avail %d needed %d)", avail, count);
454 txq->stats.drop_total++;
456 txq->stats.drop_tso++;
457 rte_pktmbuf_free(txm);
462 /* Drop non-TSO packet that is excessively fragmented */
463 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466 txq->stats.drop_too_many_segs++;
467 txq->stats.drop_total++;
468 rte_pktmbuf_free(txm);
473 if (txm->nb_segs == 1 &&
474 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
475 struct Vmxnet3_TxDataDesc *tdd;
477 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
478 copy_size = rte_pktmbuf_pkt_len(txm);
479 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
482 /* use the previous gen bit for the SOP desc */
483 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
484 first2fill = txq->cmd_ring.next2fill;
486 /* Remember the transmit buffer for cleanup */
487 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
489 /* NB: the following assumes that VMXNET3 maximum
490 * transmit buffer size (16K) is greater than
491 * maximum size of mbuf segment size.
493 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
495 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
496 txq->cmd_ring.next2fill *
497 sizeof(struct Vmxnet3_TxDataDesc));
499 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
501 gdesc->dword[2] = dw2 | m_seg->data_len;
504 /* move to the next2fill descriptor */
505 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
507 /* use the right gen for non-SOP desc */
508 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
509 } while ((m_seg = m_seg->next) != NULL);
511 /* set the last buf_info for the pkt */
513 /* Update the EOP descriptor */
514 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
516 /* Add VLAN tag if present */
517 gdesc = txq->cmd_ring.base + first2fill;
518 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
520 gdesc->txd.tci = txm->vlan_tci;
524 uint16_t mss = txm->tso_segsz;
528 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
529 gdesc->txd.om = VMXNET3_OM_TSO;
530 gdesc->txd.msscof = mss;
532 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
533 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
534 gdesc->txd.om = VMXNET3_OM_CSUM;
535 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
537 switch (txm->ol_flags & PKT_TX_L4_MASK) {
538 case PKT_TX_TCP_CKSUM:
539 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
541 case PKT_TX_UDP_CKSUM:
542 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
545 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
546 txm->ol_flags & PKT_TX_L4_MASK);
552 gdesc->txd.om = VMXNET3_OM_NONE;
553 gdesc->txd.msscof = 0;
557 /* flip the GEN bit on the SOP */
558 rte_compiler_barrier();
559 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
561 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
565 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
567 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
568 txq_ctrl->txNumDeferred = 0;
569 /* Notify vSwitch that packets are available. */
570 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
571 txq->cmd_ring.next2fill);
578 * Allocates mbufs and clusters. Post rx descriptors with buffer details
579 * so that device can receive packets in those buffers.
581 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
582 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
583 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
584 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
588 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
591 uint32_t i = 0, val = 0;
592 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
595 /* Usually: One HEAD type buf per packet
596 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
597 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
600 /* We use single packet buffer so all heads here */
601 val = VMXNET3_RXD_BTYPE_HEAD;
603 /* All BODY type buffers for 2nd ring */
604 val = VMXNET3_RXD_BTYPE_BODY;
607 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
608 struct Vmxnet3_RxDesc *rxd;
609 struct rte_mbuf *mbuf;
610 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
612 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
614 /* Allocate blank mbuf for the current Rx Descriptor */
615 mbuf = rte_mbuf_raw_alloc(rxq->mp);
616 if (unlikely(mbuf == NULL)) {
617 PMD_RX_LOG(ERR, "Error allocating mbuf");
618 rxq->stats.rx_buf_alloc_failure++;
624 * Load mbuf pointer into buf_info[ring_size]
625 * buf_info structure is equivalent to cookie for virtio-virtqueue
628 buf_info->len = (uint16_t)(mbuf->buf_len -
629 RTE_PKTMBUF_HEADROOM);
630 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
632 /* Load Rx Descriptor with the buffer's GPA */
633 rxd->addr = buf_info->bufPA;
635 /* After this point rxd->addr MUST not be NULL */
637 rxd->len = buf_info->len;
638 /* Flip gen bit at the end to change ownership */
639 rxd->gen = ring->gen;
641 vmxnet3_cmd_ring_adv_next2fill(ring);
645 /* Return error only if no buffers are posted at present */
646 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
653 /* Receive side checksum and other offloads */
655 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
658 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
659 rxm->ol_flags |= PKT_RX_RSS_HASH;
660 rxm->hash.rss = rcd->rssHash;
663 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
665 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
666 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
668 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
669 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
671 rxm->packet_type = RTE_PTYPE_L3_IPV4;
675 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
677 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
678 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
684 * Process the Rx Completion Ring of given vmxnet3_rx_queue
685 * for nb_pkts burst and return the number of packets received
688 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
691 uint32_t nb_rxd, idx;
693 vmxnet3_rx_queue_t *rxq;
694 Vmxnet3_RxCompDesc *rcd;
695 vmxnet3_buf_info_t *rbi;
697 struct rte_mbuf *rxm = NULL;
698 struct vmxnet3_hw *hw;
708 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
710 if (unlikely(rxq->stopped)) {
711 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
715 while (rcd->gen == rxq->comp_ring.gen) {
716 if (nb_rx >= nb_pkts)
720 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
721 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
722 RTE_SET_USED(rxd); /* used only for assert when enabled */
723 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
725 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
727 RTE_ASSERT(rcd->len <= rxd->len);
730 /* Get the packet buffer pointer from buf_info */
733 /* Clear descriptor associated buf_info to be reused */
737 /* Update the index that we received a packet */
738 rxq->cmd_ring[ring_idx].next2comp = idx;
740 /* For RCD with EOP set, check if there is frame error */
741 if (unlikely(rcd->eop && rcd->err)) {
742 rxq->stats.drop_total++;
743 rxq->stats.drop_err++;
746 rxq->stats.drop_fcs++;
747 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
749 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
750 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
751 rxq->comp_ring.base), rcd->rxdIdx);
752 rte_pktmbuf_free_seg(rxm);
756 /* Initialize newly received packet buffer */
757 rxm->port = rxq->port_id;
760 rxm->pkt_len = (uint16_t)rcd->len;
761 rxm->data_len = (uint16_t)rcd->len;
762 rxm->data_off = RTE_PKTMBUF_HEADROOM;
767 * If this is the first buffer of the received packet,
768 * set the pointer to the first mbuf of the packet
769 * Otherwise, update the total length and the number of segments
770 * of the current scattered packet, and update the pointer to
771 * the last mbuf of the current packet.
774 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
776 if (unlikely(rcd->len == 0)) {
777 RTE_ASSERT(rcd->eop);
780 "Rx buf was skipped. rxring[%d][%d])",
782 rte_pktmbuf_free_seg(rxm);
786 rxq->start_seg = rxm;
787 vmxnet3_rx_offload(rcd, rxm);
789 struct rte_mbuf *start = rxq->start_seg;
791 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
793 start->pkt_len += rxm->data_len;
796 rxq->last_seg->next = rxm;
801 struct rte_mbuf *start = rxq->start_seg;
803 /* Check for hardware stripped VLAN tag */
805 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
806 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
809 rx_pkts[nb_rx++] = start;
810 rxq->start_seg = NULL;
814 rxq->cmd_ring[ring_idx].next2comp = idx;
815 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
816 rxq->cmd_ring[ring_idx].size);
818 /* It's time to allocate some new buf and renew descriptors */
819 vmxnet3_post_rx_bufs(rxq, ring_idx);
820 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
821 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
822 rxq->cmd_ring[ring_idx].next2fill);
825 /* Advance to the next descriptor in comp_ring */
826 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
828 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
830 if (nb_rxd > rxq->cmd_ring[0].size) {
831 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
832 " relinquish control.");
841 * Create memzone for device rings. malloc can't be used as the physical address is
842 * needed. If the memzone is already created, then this function returns a ptr
845 static const struct rte_memzone *
846 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
847 uint16_t queue_id, uint32_t ring_size, int socket_id)
849 char z_name[RTE_MEMZONE_NAMESIZE];
850 const struct rte_memzone *mz;
852 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
853 dev->driver->pci_drv.driver.name, ring_name,
854 dev->data->port_id, queue_id);
856 mz = rte_memzone_lookup(z_name);
860 return rte_memzone_reserve_aligned(z_name, ring_size,
861 socket_id, 0, VMXNET3_RING_BA_ALIGN);
865 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
868 unsigned int socket_id,
869 __rte_unused const struct rte_eth_txconf *tx_conf)
871 struct vmxnet3_hw *hw = dev->data->dev_private;
872 const struct rte_memzone *mz;
873 struct vmxnet3_tx_queue *txq;
874 struct vmxnet3_cmd_ring *ring;
875 struct vmxnet3_comp_ring *comp_ring;
876 struct vmxnet3_data_ring *data_ring;
879 PMD_INIT_FUNC_TRACE();
881 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
882 ETH_TXQ_FLAGS_NOXSUMSCTP) {
883 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
887 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
888 RTE_CACHE_LINE_SIZE);
890 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
894 txq->queue_id = queue_idx;
895 txq->port_id = dev->data->port_id;
896 txq->shared = &hw->tqd_start[queue_idx];
898 txq->qid = queue_idx;
901 ring = &txq->cmd_ring;
902 comp_ring = &txq->comp_ring;
903 data_ring = &txq->data_ring;
905 /* Tx vmxnet ring length should be between 512-4096 */
906 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
907 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
908 VMXNET3_DEF_TX_RING_SIZE);
910 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
911 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
912 VMXNET3_TX_RING_MAX_SIZE);
915 ring->size = nb_desc;
916 ring->size &= ~VMXNET3_RING_SIZE_MASK;
918 comp_ring->size = data_ring->size = ring->size;
920 /* Tx vmxnet rings structure initialization*/
923 ring->gen = VMXNET3_INIT_GEN;
924 comp_ring->next2proc = 0;
925 comp_ring->gen = VMXNET3_INIT_GEN;
927 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
928 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
929 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
931 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
933 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
936 memset(mz->addr, 0, mz->len);
938 /* cmd_ring initialization */
939 ring->base = mz->addr;
940 ring->basePA = mz->phys_addr;
942 /* comp_ring initialization */
943 comp_ring->base = ring->base + ring->size;
944 comp_ring->basePA = ring->basePA +
945 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
947 /* data_ring initialization */
948 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
949 data_ring->basePA = comp_ring->basePA +
950 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
952 /* cmd_ring0 buf_info allocation */
953 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
954 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
955 if (ring->buf_info == NULL) {
956 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
960 /* Update the data portion with txq */
961 dev->data->tx_queues[queue_idx] = txq;
967 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
970 unsigned int socket_id,
971 __rte_unused const struct rte_eth_rxconf *rx_conf,
972 struct rte_mempool *mp)
974 const struct rte_memzone *mz;
975 struct vmxnet3_rx_queue *rxq;
976 struct vmxnet3_hw *hw = dev->data->dev_private;
977 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
978 struct vmxnet3_comp_ring *comp_ring;
983 PMD_INIT_FUNC_TRACE();
985 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
986 RTE_CACHE_LINE_SIZE);
988 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
993 rxq->queue_id = queue_idx;
994 rxq->port_id = dev->data->port_id;
995 rxq->shared = &hw->rqd_start[queue_idx];
997 rxq->qid1 = queue_idx;
998 rxq->qid2 = queue_idx + hw->num_rx_queues;
1001 ring0 = &rxq->cmd_ring[0];
1002 ring1 = &rxq->cmd_ring[1];
1003 comp_ring = &rxq->comp_ring;
1005 /* Rx vmxnet rings length should be between 256-4096 */
1006 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1007 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1009 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1010 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1013 ring0->size = nb_desc;
1014 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1015 ring1->size = ring0->size;
1018 comp_ring->size = ring0->size + ring1->size;
1020 /* Rx vmxnet rings structure initialization */
1021 ring0->next2fill = 0;
1022 ring1->next2fill = 0;
1023 ring0->next2comp = 0;
1024 ring1->next2comp = 0;
1025 ring0->gen = VMXNET3_INIT_GEN;
1026 ring1->gen = VMXNET3_INIT_GEN;
1027 comp_ring->next2proc = 0;
1028 comp_ring->gen = VMXNET3_INIT_GEN;
1030 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1031 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1033 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1035 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1038 memset(mz->addr, 0, mz->len);
1040 /* cmd_ring0 initialization */
1041 ring0->base = mz->addr;
1042 ring0->basePA = mz->phys_addr;
1044 /* cmd_ring1 initialization */
1045 ring1->base = ring0->base + ring0->size;
1046 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1048 /* comp_ring initialization */
1049 comp_ring->base = ring1->base + ring1->size;
1050 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1053 /* cmd_ring0-cmd_ring1 buf_info allocation */
1054 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1056 ring = &rxq->cmd_ring[i];
1058 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1060 ring->buf_info = rte_zmalloc(mem_name,
1061 ring->size * sizeof(vmxnet3_buf_info_t),
1062 RTE_CACHE_LINE_SIZE);
1063 if (ring->buf_info == NULL) {
1064 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1069 /* Update the data portion with rxq */
1070 dev->data->rx_queues[queue_idx] = rxq;
1076 * Initializes Receive Unit
1077 * Load mbufs in rx queue in advance
1080 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1082 struct vmxnet3_hw *hw = dev->data->dev_private;
1087 PMD_INIT_FUNC_TRACE();
1089 for (i = 0; i < hw->num_rx_queues; i++) {
1090 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1092 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1093 /* Passing 0 as alloc_num will allocate full ring */
1094 ret = vmxnet3_post_rx_bufs(rxq, j);
1097 "ERROR: Posting Rxq: %d buffers ring: %d",
1102 * Updating device with the index:next2fill to fill the
1103 * mbufs for coming packets.
1105 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1106 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1107 rxq->cmd_ring[j].next2fill);
1110 rxq->stopped = FALSE;
1111 rxq->start_seg = NULL;
1114 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1115 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1117 txq->stopped = FALSE;
1123 static uint8_t rss_intel_key[40] = {
1124 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1125 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1126 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1127 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1128 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1132 * Configure RSS feature
1135 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1137 struct vmxnet3_hw *hw = dev->data->dev_private;
1138 struct VMXNET3_RSSConf *dev_rss_conf;
1139 struct rte_eth_rss_conf *port_rss_conf;
1143 PMD_INIT_FUNC_TRACE();
1145 dev_rss_conf = hw->rss_conf;
1146 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1148 /* loading hashFunc */
1149 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1150 /* loading hashKeySize */
1151 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1152 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1153 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1155 if (port_rss_conf->rss_key == NULL) {
1156 /* Default hash key */
1157 port_rss_conf->rss_key = rss_intel_key;
1160 /* loading hashKey */
1161 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1162 dev_rss_conf->hashKeySize);
1164 /* loading indTable */
1165 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1166 if (j == dev->data->nb_rx_queues)
1168 dev_rss_conf->indTable[i] = j;
1171 /* loading hashType */
1172 dev_rss_conf->hashType = 0;
1173 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1174 if (rss_hf & ETH_RSS_IPV4)
1175 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1176 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1177 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1178 if (rss_hf & ETH_RSS_IPV6)
1179 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1180 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1181 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1183 return VMXNET3_SUCCESS;