4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204 /* Release the memzone */
205 rte_memzone_free(tq->mz);
206 /* Release the queue */
212 vmxnet3_dev_rx_queue_release(void *rxq)
215 vmxnet3_rx_queue_t *rq = rxq;
219 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
222 /* Release both the cmd_rings */
223 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
224 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
226 /* Release the memzone */
227 rte_memzone_free(rq->mz);
229 /* Release the queue */
235 vmxnet3_dev_tx_queue_reset(void *txq)
237 vmxnet3_tx_queue_t *tq = txq;
238 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
239 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
240 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
244 /* Release the cmd_ring mbufs */
245 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
248 /* Tx vmxnet rings structure initialization*/
251 ring->gen = VMXNET3_INIT_GEN;
252 comp_ring->next2proc = 0;
253 comp_ring->gen = VMXNET3_INIT_GEN;
255 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
256 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
257 size += tq->txdata_desc_size * data_ring->size;
259 memset(ring->base, 0, size);
263 vmxnet3_dev_rx_queue_reset(void *rxq)
266 vmxnet3_rx_queue_t *rq = rxq;
267 struct vmxnet3_hw *hw = rq->hw;
268 struct vmxnet3_cmd_ring *ring0, *ring1;
269 struct vmxnet3_comp_ring *comp_ring;
270 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
273 /* Release both the cmd_rings mbufs */
274 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
275 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
277 ring0 = &rq->cmd_ring[0];
278 ring1 = &rq->cmd_ring[1];
279 comp_ring = &rq->comp_ring;
281 /* Rx vmxnet rings structure initialization */
282 ring0->next2fill = 0;
283 ring1->next2fill = 0;
284 ring0->next2comp = 0;
285 ring1->next2comp = 0;
286 ring0->gen = VMXNET3_INIT_GEN;
287 ring1->gen = VMXNET3_INIT_GEN;
288 comp_ring->next2proc = 0;
289 comp_ring->gen = VMXNET3_INIT_GEN;
291 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
292 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
293 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
294 size += rq->data_desc_size * data_ring->size;
296 memset(ring0->base, 0, size);
300 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
304 PMD_INIT_FUNC_TRACE();
306 for (i = 0; i < dev->data->nb_tx_queues; i++) {
307 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
311 vmxnet3_dev_tx_queue_reset(txq);
315 for (i = 0; i < dev->data->nb_rx_queues; i++) {
316 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
320 vmxnet3_dev_rx_queue_reset(rxq);
326 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
329 struct rte_mbuf *mbuf;
331 /* Release cmd_ring descriptor and free mbuf */
332 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
334 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
336 rte_panic("EOP desc does not point to a valid mbuf");
337 rte_pktmbuf_free(mbuf);
339 txq->cmd_ring.buf_info[eop_idx].m = NULL;
341 while (txq->cmd_ring.next2comp != eop_idx) {
342 /* no out-of-order completion */
343 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
344 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
348 /* Mark the txd for which tcd was generated as completed */
349 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
351 return completed + 1;
355 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
358 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
359 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
360 (comp_ring->base + comp_ring->next2proc);
362 while (tcd->gen == comp_ring->gen) {
363 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
365 vmxnet3_comp_ring_adv_next2proc(comp_ring);
366 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
367 comp_ring->next2proc);
370 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
374 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
382 for (i = 0; i != nb_pkts; i++) {
384 ol_flags = m->ol_flags;
386 /* Non-TSO packet cannot occupy more than
387 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
389 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
390 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
395 /* check that only supported TX offloads are requested. */
396 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
397 (ol_flags & PKT_TX_L4_MASK) ==
399 rte_errno = -ENOTSUP;
403 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
404 ret = rte_validate_tx_offload(m);
410 ret = rte_net_intel_cksum_prepare(m);
421 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
425 vmxnet3_tx_queue_t *txq = tx_queue;
426 struct vmxnet3_hw *hw = txq->hw;
427 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
428 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
430 if (unlikely(txq->stopped)) {
431 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
435 /* Free up the comp_descriptors aggressively */
436 vmxnet3_tq_tx_complete(txq);
439 while (nb_tx < nb_pkts) {
440 Vmxnet3_GenericDesc *gdesc;
441 vmxnet3_buf_info_t *tbi;
442 uint32_t first2fill, avail, dw2;
443 struct rte_mbuf *txm = tx_pkts[nb_tx];
444 struct rte_mbuf *m_seg = txm;
446 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
447 /* # of descriptors needed for a packet. */
448 unsigned count = txm->nb_segs;
450 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
452 /* Is command ring full? */
453 if (unlikely(avail == 0)) {
454 PMD_TX_LOG(DEBUG, "No free ring descriptors");
455 txq->stats.tx_ring_full++;
456 txq->stats.drop_total += (nb_pkts - nb_tx);
460 /* Command ring is not full but cannot handle the
461 * multi-segmented packet. Let's try the next packet
464 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
465 "(avail %d needed %d)", avail, count);
466 txq->stats.drop_total++;
468 txq->stats.drop_tso++;
469 rte_pktmbuf_free(txm);
474 /* Drop non-TSO packet that is excessively fragmented */
475 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
476 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
477 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
478 txq->stats.drop_too_many_segs++;
479 txq->stats.drop_total++;
480 rte_pktmbuf_free(txm);
485 if (txm->nb_segs == 1 &&
486 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
487 struct Vmxnet3_TxDataDesc *tdd;
489 tdd = (struct Vmxnet3_TxDataDesc *)
490 ((uint8 *)txq->data_ring.base +
491 txq->cmd_ring.next2fill *
492 txq->txdata_desc_size);
493 copy_size = rte_pktmbuf_pkt_len(txm);
494 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
497 /* use the previous gen bit for the SOP desc */
498 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
499 first2fill = txq->cmd_ring.next2fill;
501 /* Remember the transmit buffer for cleanup */
502 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
504 /* NB: the following assumes that VMXNET3 maximum
505 * transmit buffer size (16K) is greater than
506 * maximum size of mbuf segment size.
508 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
511 (uint64)txq->cmd_ring.next2fill *
512 txq->txdata_desc_size;
514 rte_cpu_to_le_64(txq->data_ring.basePA +
517 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
520 gdesc->dword[2] = dw2 | m_seg->data_len;
523 /* move to the next2fill descriptor */
524 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
526 /* use the right gen for non-SOP desc */
527 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
528 } while ((m_seg = m_seg->next) != NULL);
530 /* set the last buf_info for the pkt */
532 /* Update the EOP descriptor */
533 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
535 /* Add VLAN tag if present */
536 gdesc = txq->cmd_ring.base + first2fill;
537 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
539 gdesc->txd.tci = txm->vlan_tci;
543 uint16_t mss = txm->tso_segsz;
547 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
548 gdesc->txd.om = VMXNET3_OM_TSO;
549 gdesc->txd.msscof = mss;
551 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
552 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
553 gdesc->txd.om = VMXNET3_OM_CSUM;
554 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
556 switch (txm->ol_flags & PKT_TX_L4_MASK) {
557 case PKT_TX_TCP_CKSUM:
558 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
560 case PKT_TX_UDP_CKSUM:
561 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
564 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
565 txm->ol_flags & PKT_TX_L4_MASK);
571 gdesc->txd.om = VMXNET3_OM_NONE;
572 gdesc->txd.msscof = 0;
576 /* flip the GEN bit on the SOP */
577 rte_compiler_barrier();
578 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
580 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
584 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
586 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
587 txq_ctrl->txNumDeferred = 0;
588 /* Notify vSwitch that packets are available. */
589 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
590 txq->cmd_ring.next2fill);
597 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
598 struct rte_mbuf *mbuf)
601 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
602 struct Vmxnet3_RxDesc *rxd =
603 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
604 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
607 /* Usually: One HEAD type buf per packet
608 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
609 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
612 /* We use single packet buffer so all heads here */
613 val = VMXNET3_RXD_BTYPE_HEAD;
615 /* All BODY type buffers for 2nd ring */
616 val = VMXNET3_RXD_BTYPE_BODY;
620 * Load mbuf pointer into buf_info[ring_size]
621 * buf_info structure is equivalent to cookie for virtio-virtqueue
624 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
625 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
627 /* Load Rx Descriptor with the buffer's GPA */
628 rxd->addr = buf_info->bufPA;
630 /* After this point rxd->addr MUST not be NULL */
632 rxd->len = buf_info->len;
633 /* Flip gen bit at the end to change ownership */
634 rxd->gen = ring->gen;
636 vmxnet3_cmd_ring_adv_next2fill(ring);
639 * Allocates mbufs and clusters. Post rx descriptors with buffer details
640 * so that device can receive packets in those buffers.
642 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
643 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
644 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
645 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
649 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
653 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
655 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
656 struct rte_mbuf *mbuf;
658 /* Allocate blank mbuf for the current Rx Descriptor */
659 mbuf = rte_mbuf_raw_alloc(rxq->mp);
660 if (unlikely(mbuf == NULL)) {
661 PMD_RX_LOG(ERR, "Error allocating mbuf");
662 rxq->stats.rx_buf_alloc_failure++;
667 vmxnet3_renew_desc(rxq, ring_id, mbuf);
671 /* Return error only if no buffers are posted at present */
672 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
679 /* Receive side checksum and other offloads */
681 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
684 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
685 rxm->ol_flags |= PKT_RX_RSS_HASH;
686 rxm->hash.rss = rcd->rssHash;
689 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
691 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
692 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
694 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
695 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
697 rxm->packet_type = RTE_PTYPE_L3_IPV4;
701 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
703 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
704 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
710 * Process the Rx Completion Ring of given vmxnet3_rx_queue
711 * for nb_pkts burst and return the number of packets received
714 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
717 uint32_t nb_rxd, idx;
719 vmxnet3_rx_queue_t *rxq;
720 Vmxnet3_RxCompDesc *rcd;
721 vmxnet3_buf_info_t *rbi;
723 struct rte_mbuf *rxm = NULL;
724 struct vmxnet3_hw *hw;
734 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
736 if (unlikely(rxq->stopped)) {
737 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
741 while (rcd->gen == rxq->comp_ring.gen) {
742 struct rte_mbuf *newm;
744 if (nb_rx >= nb_pkts)
747 newm = rte_mbuf_raw_alloc(rxq->mp);
748 if (unlikely(newm == NULL)) {
749 PMD_RX_LOG(ERR, "Error allocating mbuf");
750 rxq->stats.rx_buf_alloc_failure++;
755 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
756 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
757 RTE_SET_USED(rxd); /* used only for assert when enabled */
758 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
760 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
762 RTE_ASSERT(rcd->len <= rxd->len);
765 /* Get the packet buffer pointer from buf_info */
768 /* Clear descriptor associated buf_info to be reused */
772 /* Update the index that we received a packet */
773 rxq->cmd_ring[ring_idx].next2comp = idx;
775 /* For RCD with EOP set, check if there is frame error */
776 if (unlikely(rcd->eop && rcd->err)) {
777 rxq->stats.drop_total++;
778 rxq->stats.drop_err++;
781 rxq->stats.drop_fcs++;
782 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
784 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
785 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
786 rxq->comp_ring.base), rcd->rxdIdx);
787 rte_pktmbuf_free_seg(rxm);
788 if (rxq->start_seg) {
789 struct rte_mbuf *start = rxq->start_seg;
791 rxq->start_seg = NULL;
792 rte_pktmbuf_free(start);
797 /* Initialize newly received packet buffer */
798 rxm->port = rxq->port_id;
801 rxm->pkt_len = (uint16_t)rcd->len;
802 rxm->data_len = (uint16_t)rcd->len;
803 rxm->data_off = RTE_PKTMBUF_HEADROOM;
808 * If this is the first buffer of the received packet,
809 * set the pointer to the first mbuf of the packet
810 * Otherwise, update the total length and the number of segments
811 * of the current scattered packet, and update the pointer to
812 * the last mbuf of the current packet.
815 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
817 if (unlikely(rcd->len == 0)) {
818 RTE_ASSERT(rcd->eop);
821 "Rx buf was skipped. rxring[%d][%d])",
823 rte_pktmbuf_free_seg(rxm);
827 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
828 uint8_t *rdd = rxq->data_ring.base +
829 idx * rxq->data_desc_size;
831 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
832 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
836 rxq->start_seg = rxm;
837 vmxnet3_rx_offload(rcd, rxm);
839 struct rte_mbuf *start = rxq->start_seg;
841 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
843 start->pkt_len += rxm->data_len;
846 rxq->last_seg->next = rxm;
851 struct rte_mbuf *start = rxq->start_seg;
853 /* Check for hardware stripped VLAN tag */
855 start->ol_flags |= (PKT_RX_VLAN |
856 PKT_RX_VLAN_STRIPPED);
857 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
860 rx_pkts[nb_rx++] = start;
861 rxq->start_seg = NULL;
865 rxq->cmd_ring[ring_idx].next2comp = idx;
866 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
867 rxq->cmd_ring[ring_idx].size);
869 /* It's time to renew descriptors */
870 vmxnet3_renew_desc(rxq, ring_idx, newm);
871 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
872 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
873 rxq->cmd_ring[ring_idx].next2fill);
876 /* Advance to the next descriptor in comp_ring */
877 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
879 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
881 if (nb_rxd > rxq->cmd_ring[0].size) {
882 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
883 " relinquish control.");
888 if (unlikely(nb_rxd == 0)) {
890 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
891 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
892 if (unlikely(avail > 0)) {
893 /* try to alloc new buf and renew descriptors */
894 vmxnet3_post_rx_bufs(rxq, ring_idx);
897 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
898 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
899 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
900 rxq->cmd_ring[ring_idx].next2fill);
909 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
912 unsigned int socket_id,
913 const struct rte_eth_txconf *tx_conf)
915 struct vmxnet3_hw *hw = dev->data->dev_private;
916 const struct rte_memzone *mz;
917 struct vmxnet3_tx_queue *txq;
918 struct vmxnet3_cmd_ring *ring;
919 struct vmxnet3_comp_ring *comp_ring;
920 struct vmxnet3_data_ring *data_ring;
923 PMD_INIT_FUNC_TRACE();
925 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
926 ETH_TXQ_FLAGS_NOXSUMSCTP) {
927 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
931 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
932 RTE_CACHE_LINE_SIZE);
934 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
938 txq->queue_id = queue_idx;
939 txq->port_id = dev->data->port_id;
940 txq->shared = &hw->tqd_start[queue_idx];
942 txq->qid = queue_idx;
944 txq->txdata_desc_size = hw->txdata_desc_size;
946 ring = &txq->cmd_ring;
947 comp_ring = &txq->comp_ring;
948 data_ring = &txq->data_ring;
950 /* Tx vmxnet ring length should be between 512-4096 */
951 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
952 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
953 VMXNET3_DEF_TX_RING_SIZE);
955 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
956 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
957 VMXNET3_TX_RING_MAX_SIZE);
960 ring->size = nb_desc;
961 ring->size &= ~VMXNET3_RING_SIZE_MASK;
963 comp_ring->size = data_ring->size = ring->size;
965 /* Tx vmxnet rings structure initialization*/
968 ring->gen = VMXNET3_INIT_GEN;
969 comp_ring->next2proc = 0;
970 comp_ring->gen = VMXNET3_INIT_GEN;
972 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
973 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
974 size += txq->txdata_desc_size * data_ring->size;
976 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
977 VMXNET3_RING_BA_ALIGN, socket_id);
979 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
983 memset(mz->addr, 0, mz->len);
985 /* cmd_ring initialization */
986 ring->base = mz->addr;
987 ring->basePA = mz->iova;
989 /* comp_ring initialization */
990 comp_ring->base = ring->base + ring->size;
991 comp_ring->basePA = ring->basePA +
992 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
994 /* data_ring initialization */
995 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
996 data_ring->basePA = comp_ring->basePA +
997 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
999 /* cmd_ring0 buf_info allocation */
1000 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1001 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1002 if (ring->buf_info == NULL) {
1003 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1007 /* Update the data portion with txq */
1008 dev->data->tx_queues[queue_idx] = txq;
1014 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1017 unsigned int socket_id,
1018 __rte_unused const struct rte_eth_rxconf *rx_conf,
1019 struct rte_mempool *mp)
1021 const struct rte_memzone *mz;
1022 struct vmxnet3_rx_queue *rxq;
1023 struct vmxnet3_hw *hw = dev->data->dev_private;
1024 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1025 struct vmxnet3_comp_ring *comp_ring;
1026 struct vmxnet3_rx_data_ring *data_ring;
1031 PMD_INIT_FUNC_TRACE();
1033 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1034 RTE_CACHE_LINE_SIZE);
1036 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1041 rxq->queue_id = queue_idx;
1042 rxq->port_id = dev->data->port_id;
1043 rxq->shared = &hw->rqd_start[queue_idx];
1045 rxq->qid1 = queue_idx;
1046 rxq->qid2 = queue_idx + hw->num_rx_queues;
1047 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1048 rxq->data_desc_size = hw->rxdata_desc_size;
1049 rxq->stopped = TRUE;
1051 ring0 = &rxq->cmd_ring[0];
1052 ring1 = &rxq->cmd_ring[1];
1053 comp_ring = &rxq->comp_ring;
1054 data_ring = &rxq->data_ring;
1056 /* Rx vmxnet rings length should be between 256-4096 */
1057 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1058 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1060 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1061 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1064 ring0->size = nb_desc;
1065 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1066 ring1->size = ring0->size;
1069 comp_ring->size = ring0->size + ring1->size;
1070 data_ring->size = ring0->size;
1072 /* Rx vmxnet rings structure initialization */
1073 ring0->next2fill = 0;
1074 ring1->next2fill = 0;
1075 ring0->next2comp = 0;
1076 ring1->next2comp = 0;
1077 ring0->gen = VMXNET3_INIT_GEN;
1078 ring1->gen = VMXNET3_INIT_GEN;
1079 comp_ring->next2proc = 0;
1080 comp_ring->gen = VMXNET3_INIT_GEN;
1082 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1083 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1084 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1085 size += rxq->data_desc_size * data_ring->size;
1087 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1088 VMXNET3_RING_BA_ALIGN, socket_id);
1090 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1094 memset(mz->addr, 0, mz->len);
1096 /* cmd_ring0 initialization */
1097 ring0->base = mz->addr;
1098 ring0->basePA = mz->iova;
1100 /* cmd_ring1 initialization */
1101 ring1->base = ring0->base + ring0->size;
1102 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1104 /* comp_ring initialization */
1105 comp_ring->base = ring1->base + ring1->size;
1106 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1109 /* data_ring initialization */
1110 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1112 (uint8_t *)(comp_ring->base + comp_ring->size);
1113 data_ring->basePA = comp_ring->basePA +
1114 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1117 /* cmd_ring0-cmd_ring1 buf_info allocation */
1118 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1120 ring = &rxq->cmd_ring[i];
1122 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1124 ring->buf_info = rte_zmalloc(mem_name,
1125 ring->size * sizeof(vmxnet3_buf_info_t),
1126 RTE_CACHE_LINE_SIZE);
1127 if (ring->buf_info == NULL) {
1128 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1133 /* Update the data portion with rxq */
1134 dev->data->rx_queues[queue_idx] = rxq;
1140 * Initializes Receive Unit
1141 * Load mbufs in rx queue in advance
1144 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1146 struct vmxnet3_hw *hw = dev->data->dev_private;
1151 PMD_INIT_FUNC_TRACE();
1153 for (i = 0; i < hw->num_rx_queues; i++) {
1154 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1156 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1157 /* Passing 0 as alloc_num will allocate full ring */
1158 ret = vmxnet3_post_rx_bufs(rxq, j);
1161 "ERROR: Posting Rxq: %d buffers ring: %d",
1166 * Updating device with the index:next2fill to fill the
1167 * mbufs for coming packets.
1169 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1170 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1171 rxq->cmd_ring[j].next2fill);
1174 rxq->stopped = FALSE;
1175 rxq->start_seg = NULL;
1178 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1179 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1181 txq->stopped = FALSE;
1187 static uint8_t rss_intel_key[40] = {
1188 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1189 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1190 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1191 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1192 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1196 * Configure RSS feature
1199 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1201 struct vmxnet3_hw *hw = dev->data->dev_private;
1202 struct VMXNET3_RSSConf *dev_rss_conf;
1203 struct rte_eth_rss_conf *port_rss_conf;
1207 PMD_INIT_FUNC_TRACE();
1209 dev_rss_conf = hw->rss_conf;
1210 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1212 /* loading hashFunc */
1213 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1214 /* loading hashKeySize */
1215 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1216 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1217 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1219 if (port_rss_conf->rss_key == NULL) {
1220 /* Default hash key */
1221 port_rss_conf->rss_key = rss_intel_key;
1224 /* loading hashKey */
1225 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1226 dev_rss_conf->hashKeySize);
1228 /* loading indTable */
1229 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1230 if (j == dev->data->nb_rx_queues)
1232 dev_rss_conf->indTable[i] = j;
1235 /* loading hashType */
1236 dev_rss_conf->hashType = 0;
1237 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1238 if (rss_hf & ETH_RSS_IPV4)
1239 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1240 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1241 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1242 if (rss_hf & ETH_RSS_IPV6)
1243 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1244 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1245 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1247 return VMXNET3_SUCCESS;