4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204 /* Release the memzone */
205 rte_memzone_free(tq->mz);
210 vmxnet3_dev_rx_queue_release(void *rxq)
213 vmxnet3_rx_queue_t *rq = rxq;
217 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
218 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
220 /* Release both the cmd_rings */
221 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
222 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
224 /* Release the memzone */
225 rte_memzone_free(rq->mz);
230 vmxnet3_dev_tx_queue_reset(void *txq)
232 vmxnet3_tx_queue_t *tq = txq;
233 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
234 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
235 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
239 /* Release the cmd_ring mbufs */
240 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
243 /* Tx vmxnet rings structure initialization*/
246 ring->gen = VMXNET3_INIT_GEN;
247 comp_ring->next2proc = 0;
248 comp_ring->gen = VMXNET3_INIT_GEN;
250 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
251 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
252 size += tq->txdata_desc_size * data_ring->size;
254 memset(ring->base, 0, size);
258 vmxnet3_dev_rx_queue_reset(void *rxq)
261 vmxnet3_rx_queue_t *rq = rxq;
262 struct vmxnet3_hw *hw = rq->hw;
263 struct vmxnet3_cmd_ring *ring0, *ring1;
264 struct vmxnet3_comp_ring *comp_ring;
265 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
269 /* Release both the cmd_rings mbufs */
270 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
271 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
274 ring0 = &rq->cmd_ring[0];
275 ring1 = &rq->cmd_ring[1];
276 comp_ring = &rq->comp_ring;
278 /* Rx vmxnet rings structure initialization */
279 ring0->next2fill = 0;
280 ring1->next2fill = 0;
281 ring0->next2comp = 0;
282 ring1->next2comp = 0;
283 ring0->gen = VMXNET3_INIT_GEN;
284 ring1->gen = VMXNET3_INIT_GEN;
285 comp_ring->next2proc = 0;
286 comp_ring->gen = VMXNET3_INIT_GEN;
288 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
289 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
290 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
291 size += rq->data_desc_size * data_ring->size;
293 memset(ring0->base, 0, size);
297 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
301 PMD_INIT_FUNC_TRACE();
303 for (i = 0; i < dev->data->nb_tx_queues; i++) {
304 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
308 vmxnet3_dev_tx_queue_reset(txq);
312 for (i = 0; i < dev->data->nb_rx_queues; i++) {
313 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
317 vmxnet3_dev_rx_queue_reset(rxq);
323 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
326 struct rte_mbuf *mbuf;
328 /* Release cmd_ring descriptor and free mbuf */
329 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
331 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
333 rte_panic("EOP desc does not point to a valid mbuf");
334 rte_pktmbuf_free(mbuf);
336 txq->cmd_ring.buf_info[eop_idx].m = NULL;
338 while (txq->cmd_ring.next2comp != eop_idx) {
339 /* no out-of-order completion */
340 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
341 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
345 /* Mark the txd for which tcd was generated as completed */
346 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
348 return completed + 1;
352 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
355 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
356 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
357 (comp_ring->base + comp_ring->next2proc);
359 while (tcd->gen == comp_ring->gen) {
360 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
362 vmxnet3_comp_ring_adv_next2proc(comp_ring);
363 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
364 comp_ring->next2proc);
367 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
371 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
379 for (i = 0; i != nb_pkts; i++) {
381 ol_flags = m->ol_flags;
383 /* Non-TSO packet cannot occupy more than
384 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
386 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
387 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
392 /* check that only supported TX offloads are requested. */
393 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
394 (ol_flags & PKT_TX_L4_MASK) ==
396 rte_errno = -ENOTSUP;
400 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
401 ret = rte_validate_tx_offload(m);
407 ret = rte_net_intel_cksum_prepare(m);
418 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
422 vmxnet3_tx_queue_t *txq = tx_queue;
423 struct vmxnet3_hw *hw = txq->hw;
424 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
425 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
427 if (unlikely(txq->stopped)) {
428 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
432 /* Free up the comp_descriptors aggressively */
433 vmxnet3_tq_tx_complete(txq);
436 while (nb_tx < nb_pkts) {
437 Vmxnet3_GenericDesc *gdesc;
438 vmxnet3_buf_info_t *tbi;
439 uint32_t first2fill, avail, dw2;
440 struct rte_mbuf *txm = tx_pkts[nb_tx];
441 struct rte_mbuf *m_seg = txm;
443 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
444 /* # of descriptors needed for a packet. */
445 unsigned count = txm->nb_segs;
447 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
449 /* Is command ring full? */
450 if (unlikely(avail == 0)) {
451 PMD_TX_LOG(DEBUG, "No free ring descriptors");
452 txq->stats.tx_ring_full++;
453 txq->stats.drop_total += (nb_pkts - nb_tx);
457 /* Command ring is not full but cannot handle the
458 * multi-segmented packet. Let's try the next packet
461 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
462 "(avail %d needed %d)", avail, count);
463 txq->stats.drop_total++;
465 txq->stats.drop_tso++;
466 rte_pktmbuf_free(txm);
471 /* Drop non-TSO packet that is excessively fragmented */
472 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
473 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
474 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
475 txq->stats.drop_too_many_segs++;
476 txq->stats.drop_total++;
477 rte_pktmbuf_free(txm);
482 if (txm->nb_segs == 1 &&
483 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
484 struct Vmxnet3_TxDataDesc *tdd;
486 tdd = (struct Vmxnet3_TxDataDesc *)
487 ((uint8 *)txq->data_ring.base +
488 txq->cmd_ring.next2fill *
489 txq->txdata_desc_size);
490 copy_size = rte_pktmbuf_pkt_len(txm);
491 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
494 /* use the previous gen bit for the SOP desc */
495 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
496 first2fill = txq->cmd_ring.next2fill;
498 /* Remember the transmit buffer for cleanup */
499 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
501 /* NB: the following assumes that VMXNET3 maximum
502 * transmit buffer size (16K) is greater than
503 * maximum size of mbuf segment size.
505 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
508 (uint64)txq->cmd_ring.next2fill *
509 txq->txdata_desc_size;
511 rte_cpu_to_le_64(txq->data_ring.basePA +
514 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
517 gdesc->dword[2] = dw2 | m_seg->data_len;
520 /* move to the next2fill descriptor */
521 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
523 /* use the right gen for non-SOP desc */
524 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
525 } while ((m_seg = m_seg->next) != NULL);
527 /* set the last buf_info for the pkt */
529 /* Update the EOP descriptor */
530 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
532 /* Add VLAN tag if present */
533 gdesc = txq->cmd_ring.base + first2fill;
534 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
536 gdesc->txd.tci = txm->vlan_tci;
540 uint16_t mss = txm->tso_segsz;
544 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
545 gdesc->txd.om = VMXNET3_OM_TSO;
546 gdesc->txd.msscof = mss;
548 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
549 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
550 gdesc->txd.om = VMXNET3_OM_CSUM;
551 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
553 switch (txm->ol_flags & PKT_TX_L4_MASK) {
554 case PKT_TX_TCP_CKSUM:
555 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
557 case PKT_TX_UDP_CKSUM:
558 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
561 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
562 txm->ol_flags & PKT_TX_L4_MASK);
568 gdesc->txd.om = VMXNET3_OM_NONE;
569 gdesc->txd.msscof = 0;
573 /* flip the GEN bit on the SOP */
574 rte_compiler_barrier();
575 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
577 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
581 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
583 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
584 txq_ctrl->txNumDeferred = 0;
585 /* Notify vSwitch that packets are available. */
586 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
587 txq->cmd_ring.next2fill);
594 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
595 struct rte_mbuf *mbuf)
598 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
599 struct Vmxnet3_RxDesc *rxd =
600 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
601 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
604 /* Usually: One HEAD type buf per packet
605 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
606 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
609 /* We use single packet buffer so all heads here */
610 val = VMXNET3_RXD_BTYPE_HEAD;
612 /* All BODY type buffers for 2nd ring */
613 val = VMXNET3_RXD_BTYPE_BODY;
617 * Load mbuf pointer into buf_info[ring_size]
618 * buf_info structure is equivalent to cookie for virtio-virtqueue
621 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
622 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
624 /* Load Rx Descriptor with the buffer's GPA */
625 rxd->addr = buf_info->bufPA;
627 /* After this point rxd->addr MUST not be NULL */
629 rxd->len = buf_info->len;
630 /* Flip gen bit at the end to change ownership */
631 rxd->gen = ring->gen;
633 vmxnet3_cmd_ring_adv_next2fill(ring);
636 * Allocates mbufs and clusters. Post rx descriptors with buffer details
637 * so that device can receive packets in those buffers.
639 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
640 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
641 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
642 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
646 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
650 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
652 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
653 struct rte_mbuf *mbuf;
655 /* Allocate blank mbuf for the current Rx Descriptor */
656 mbuf = rte_mbuf_raw_alloc(rxq->mp);
657 if (unlikely(mbuf == NULL)) {
658 PMD_RX_LOG(ERR, "Error allocating mbuf");
659 rxq->stats.rx_buf_alloc_failure++;
664 vmxnet3_renew_desc(rxq, ring_id, mbuf);
668 /* Return error only if no buffers are posted at present */
669 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
676 /* Receive side checksum and other offloads */
678 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
681 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
682 rxm->ol_flags |= PKT_RX_RSS_HASH;
683 rxm->hash.rss = rcd->rssHash;
686 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
688 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
689 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
691 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
692 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
694 rxm->packet_type = RTE_PTYPE_L3_IPV4;
698 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
700 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
701 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
707 * Process the Rx Completion Ring of given vmxnet3_rx_queue
708 * for nb_pkts burst and return the number of packets received
711 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
714 uint32_t nb_rxd, idx;
716 vmxnet3_rx_queue_t *rxq;
717 Vmxnet3_RxCompDesc *rcd;
718 vmxnet3_buf_info_t *rbi;
720 struct rte_mbuf *rxm = NULL;
721 struct vmxnet3_hw *hw;
731 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
733 if (unlikely(rxq->stopped)) {
734 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
738 while (rcd->gen == rxq->comp_ring.gen) {
739 struct rte_mbuf *newm;
741 if (nb_rx >= nb_pkts)
744 newm = rte_mbuf_raw_alloc(rxq->mp);
745 if (unlikely(newm == NULL)) {
746 PMD_RX_LOG(ERR, "Error allocating mbuf");
747 rxq->stats.rx_buf_alloc_failure++;
752 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
753 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
754 RTE_SET_USED(rxd); /* used only for assert when enabled */
755 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
757 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
759 RTE_ASSERT(rcd->len <= rxd->len);
762 /* Get the packet buffer pointer from buf_info */
765 /* Clear descriptor associated buf_info to be reused */
769 /* Update the index that we received a packet */
770 rxq->cmd_ring[ring_idx].next2comp = idx;
772 /* For RCD with EOP set, check if there is frame error */
773 if (unlikely(rcd->eop && rcd->err)) {
774 rxq->stats.drop_total++;
775 rxq->stats.drop_err++;
778 rxq->stats.drop_fcs++;
779 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
781 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
782 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
783 rxq->comp_ring.base), rcd->rxdIdx);
784 rte_pktmbuf_free_seg(rxm);
785 if (rxq->start_seg) {
786 struct rte_mbuf *start = rxq->start_seg;
788 rxq->start_seg = NULL;
789 rte_pktmbuf_free(start);
794 /* Initialize newly received packet buffer */
795 rxm->port = rxq->port_id;
798 rxm->pkt_len = (uint16_t)rcd->len;
799 rxm->data_len = (uint16_t)rcd->len;
800 rxm->data_off = RTE_PKTMBUF_HEADROOM;
805 * If this is the first buffer of the received packet,
806 * set the pointer to the first mbuf of the packet
807 * Otherwise, update the total length and the number of segments
808 * of the current scattered packet, and update the pointer to
809 * the last mbuf of the current packet.
812 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
814 if (unlikely(rcd->len == 0)) {
815 RTE_ASSERT(rcd->eop);
818 "Rx buf was skipped. rxring[%d][%d])",
820 rte_pktmbuf_free_seg(rxm);
824 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
825 uint8_t *rdd = rxq->data_ring.base +
826 idx * rxq->data_desc_size;
828 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
829 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
833 rxq->start_seg = rxm;
834 vmxnet3_rx_offload(rcd, rxm);
836 struct rte_mbuf *start = rxq->start_seg;
838 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
840 start->pkt_len += rxm->data_len;
843 rxq->last_seg->next = rxm;
848 struct rte_mbuf *start = rxq->start_seg;
850 /* Check for hardware stripped VLAN tag */
852 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
853 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
856 rx_pkts[nb_rx++] = start;
857 rxq->start_seg = NULL;
861 rxq->cmd_ring[ring_idx].next2comp = idx;
862 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
863 rxq->cmd_ring[ring_idx].size);
865 /* It's time to renew descriptors */
866 vmxnet3_renew_desc(rxq, ring_idx, newm);
867 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
868 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
869 rxq->cmd_ring[ring_idx].next2fill);
872 /* Advance to the next descriptor in comp_ring */
873 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
875 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
877 if (nb_rxd > rxq->cmd_ring[0].size) {
878 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
879 " relinquish control.");
884 if (unlikely(nb_rxd == 0)) {
886 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
887 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
888 if (unlikely(avail > 0)) {
889 /* try to alloc new buf and renew descriptors */
890 vmxnet3_post_rx_bufs(rxq, ring_idx);
893 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
894 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
895 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
896 rxq->cmd_ring[ring_idx].next2fill);
905 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
908 unsigned int socket_id,
909 const struct rte_eth_txconf *tx_conf)
911 struct vmxnet3_hw *hw = dev->data->dev_private;
912 const struct rte_memzone *mz;
913 struct vmxnet3_tx_queue *txq;
914 struct vmxnet3_cmd_ring *ring;
915 struct vmxnet3_comp_ring *comp_ring;
916 struct vmxnet3_data_ring *data_ring;
919 PMD_INIT_FUNC_TRACE();
921 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
922 ETH_TXQ_FLAGS_NOXSUMSCTP) {
923 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
927 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
928 RTE_CACHE_LINE_SIZE);
930 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
934 txq->queue_id = queue_idx;
935 txq->port_id = dev->data->port_id;
936 txq->shared = &hw->tqd_start[queue_idx];
938 txq->qid = queue_idx;
940 txq->txdata_desc_size = hw->txdata_desc_size;
942 ring = &txq->cmd_ring;
943 comp_ring = &txq->comp_ring;
944 data_ring = &txq->data_ring;
946 /* Tx vmxnet ring length should be between 512-4096 */
947 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
948 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
949 VMXNET3_DEF_TX_RING_SIZE);
951 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
952 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
953 VMXNET3_TX_RING_MAX_SIZE);
956 ring->size = nb_desc;
957 ring->size &= ~VMXNET3_RING_SIZE_MASK;
959 comp_ring->size = data_ring->size = ring->size;
961 /* Tx vmxnet rings structure initialization*/
964 ring->gen = VMXNET3_INIT_GEN;
965 comp_ring->next2proc = 0;
966 comp_ring->gen = VMXNET3_INIT_GEN;
968 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
969 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
970 size += txq->txdata_desc_size * data_ring->size;
972 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
973 VMXNET3_RING_BA_ALIGN, socket_id);
975 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
979 memset(mz->addr, 0, mz->len);
981 /* cmd_ring initialization */
982 ring->base = mz->addr;
983 ring->basePA = mz->phys_addr;
985 /* comp_ring initialization */
986 comp_ring->base = ring->base + ring->size;
987 comp_ring->basePA = ring->basePA +
988 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
990 /* data_ring initialization */
991 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
992 data_ring->basePA = comp_ring->basePA +
993 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
995 /* cmd_ring0 buf_info allocation */
996 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
997 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
998 if (ring->buf_info == NULL) {
999 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1003 /* Update the data portion with txq */
1004 dev->data->tx_queues[queue_idx] = txq;
1010 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1013 unsigned int socket_id,
1014 __rte_unused const struct rte_eth_rxconf *rx_conf,
1015 struct rte_mempool *mp)
1017 const struct rte_memzone *mz;
1018 struct vmxnet3_rx_queue *rxq;
1019 struct vmxnet3_hw *hw = dev->data->dev_private;
1020 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1021 struct vmxnet3_comp_ring *comp_ring;
1022 struct vmxnet3_rx_data_ring *data_ring;
1027 PMD_INIT_FUNC_TRACE();
1029 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1030 RTE_CACHE_LINE_SIZE);
1032 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1037 rxq->queue_id = queue_idx;
1038 rxq->port_id = dev->data->port_id;
1039 rxq->shared = &hw->rqd_start[queue_idx];
1041 rxq->qid1 = queue_idx;
1042 rxq->qid2 = queue_idx + hw->num_rx_queues;
1043 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1044 rxq->data_desc_size = hw->rxdata_desc_size;
1045 rxq->stopped = TRUE;
1047 ring0 = &rxq->cmd_ring[0];
1048 ring1 = &rxq->cmd_ring[1];
1049 comp_ring = &rxq->comp_ring;
1050 data_ring = &rxq->data_ring;
1052 /* Rx vmxnet rings length should be between 256-4096 */
1053 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1054 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1056 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1057 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1060 ring0->size = nb_desc;
1061 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1062 ring1->size = ring0->size;
1065 comp_ring->size = ring0->size + ring1->size;
1066 data_ring->size = ring0->size;
1068 /* Rx vmxnet rings structure initialization */
1069 ring0->next2fill = 0;
1070 ring1->next2fill = 0;
1071 ring0->next2comp = 0;
1072 ring1->next2comp = 0;
1073 ring0->gen = VMXNET3_INIT_GEN;
1074 ring1->gen = VMXNET3_INIT_GEN;
1075 comp_ring->next2proc = 0;
1076 comp_ring->gen = VMXNET3_INIT_GEN;
1078 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1079 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1080 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1081 size += rxq->data_desc_size * data_ring->size;
1083 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1084 VMXNET3_RING_BA_ALIGN, socket_id);
1086 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1090 memset(mz->addr, 0, mz->len);
1092 /* cmd_ring0 initialization */
1093 ring0->base = mz->addr;
1094 ring0->basePA = mz->phys_addr;
1096 /* cmd_ring1 initialization */
1097 ring1->base = ring0->base + ring0->size;
1098 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1100 /* comp_ring initialization */
1101 comp_ring->base = ring1->base + ring1->size;
1102 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1105 /* data_ring initialization */
1106 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1108 (uint8_t *)(comp_ring->base + comp_ring->size);
1109 data_ring->basePA = comp_ring->basePA +
1110 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1113 /* cmd_ring0-cmd_ring1 buf_info allocation */
1114 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1116 ring = &rxq->cmd_ring[i];
1118 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1120 ring->buf_info = rte_zmalloc(mem_name,
1121 ring->size * sizeof(vmxnet3_buf_info_t),
1122 RTE_CACHE_LINE_SIZE);
1123 if (ring->buf_info == NULL) {
1124 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1129 /* Update the data portion with rxq */
1130 dev->data->rx_queues[queue_idx] = rxq;
1136 * Initializes Receive Unit
1137 * Load mbufs in rx queue in advance
1140 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1142 struct vmxnet3_hw *hw = dev->data->dev_private;
1147 PMD_INIT_FUNC_TRACE();
1149 for (i = 0; i < hw->num_rx_queues; i++) {
1150 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1152 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1153 /* Passing 0 as alloc_num will allocate full ring */
1154 ret = vmxnet3_post_rx_bufs(rxq, j);
1157 "ERROR: Posting Rxq: %d buffers ring: %d",
1162 * Updating device with the index:next2fill to fill the
1163 * mbufs for coming packets.
1165 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1166 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1167 rxq->cmd_ring[j].next2fill);
1170 rxq->stopped = FALSE;
1171 rxq->start_seg = NULL;
1174 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1175 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1177 txq->stopped = FALSE;
1183 static uint8_t rss_intel_key[40] = {
1184 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1185 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1186 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1187 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1188 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1192 * Configure RSS feature
1195 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1197 struct vmxnet3_hw *hw = dev->data->dev_private;
1198 struct VMXNET3_RSSConf *dev_rss_conf;
1199 struct rte_eth_rss_conf *port_rss_conf;
1203 PMD_INIT_FUNC_TRACE();
1205 dev_rss_conf = hw->rss_conf;
1206 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1208 /* loading hashFunc */
1209 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1210 /* loading hashKeySize */
1211 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1212 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1213 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1215 if (port_rss_conf->rss_key == NULL) {
1216 /* Default hash key */
1217 port_rss_conf->rss_key = rss_intel_key;
1220 /* loading hashKey */
1221 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1222 dev_rss_conf->hashKeySize);
1224 /* loading indTable */
1225 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1226 if (j == dev->data->nb_rx_queues)
1228 dev_rss_conf->indTable[i] = j;
1231 /* loading hashType */
1232 dev_rss_conf->hashType = 0;
1233 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1234 if (rss_hf & ETH_RSS_IPV4)
1235 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1236 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1237 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1238 if (rss_hf & ETH_RSS_IPV6)
1239 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1240 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1241 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1243 return VMXNET3_SUCCESS;