4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
86 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
107 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
110 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111 (unsigned long)rxq->cmd_ring[0].basePA,
112 (unsigned long)rxq->cmd_ring[1].basePA,
113 (unsigned long)rxq->comp_ring.basePA);
115 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
117 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)rxq->cmd_ring[0].size, avail,
119 rxq->comp_ring.next2proc,
120 rxq->cmd_ring[0].size - avail);
122 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125 rxq->cmd_ring[1].size - avail);
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
137 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140 (unsigned long)txq->cmd_ring.basePA,
141 (unsigned long)txq->comp_ring.basePA,
142 (unsigned long)txq->data_ring.basePA);
144 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146 (uint32_t)txq->cmd_ring.size, avail,
147 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
154 while (ring->next2comp != ring->next2fill) {
155 /* No need to worry about desc ownership, device is quiesced by now. */
156 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159 rte_pktmbuf_free(buf_info->m);
164 vmxnet3_cmd_ring_adv_next2comp(ring);
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
173 for (i = 0; i < ring->size; i++) {
174 /* No need to worry about desc ownership, device is quiesced by now. */
175 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
178 rte_pktmbuf_free_seg(buf_info->m);
183 vmxnet3_cmd_ring_adv_next2comp(ring);
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
190 rte_free(ring->buf_info);
191 ring->buf_info = NULL;
195 vmxnet3_dev_tx_queue_release(void *txq)
197 vmxnet3_tx_queue_t *tq = txq;
201 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202 /* Release the cmd_ring */
203 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204 /* Release the memzone */
205 rte_memzone_free(tq->mz);
210 vmxnet3_dev_rx_queue_release(void *rxq)
213 vmxnet3_rx_queue_t *rq = rxq;
217 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
218 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
220 /* Release both the cmd_rings */
221 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
222 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
224 /* Release the memzone */
225 rte_memzone_free(rq->mz);
230 vmxnet3_dev_tx_queue_reset(void *txq)
232 vmxnet3_tx_queue_t *tq = txq;
233 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
234 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
235 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
239 /* Release the cmd_ring mbufs */
240 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
243 /* Tx vmxnet rings structure initialization*/
246 ring->gen = VMXNET3_INIT_GEN;
247 comp_ring->next2proc = 0;
248 comp_ring->gen = VMXNET3_INIT_GEN;
250 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
251 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
252 size += tq->txdata_desc_size * data_ring->size;
254 memset(ring->base, 0, size);
258 vmxnet3_dev_rx_queue_reset(void *rxq)
261 vmxnet3_rx_queue_t *rq = rxq;
262 struct vmxnet3_hw *hw = rq->hw;
263 struct vmxnet3_cmd_ring *ring0, *ring1;
264 struct vmxnet3_comp_ring *comp_ring;
265 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
268 /* Release both the cmd_rings mbufs */
269 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
270 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
272 ring0 = &rq->cmd_ring[0];
273 ring1 = &rq->cmd_ring[1];
274 comp_ring = &rq->comp_ring;
276 /* Rx vmxnet rings structure initialization */
277 ring0->next2fill = 0;
278 ring1->next2fill = 0;
279 ring0->next2comp = 0;
280 ring1->next2comp = 0;
281 ring0->gen = VMXNET3_INIT_GEN;
282 ring1->gen = VMXNET3_INIT_GEN;
283 comp_ring->next2proc = 0;
284 comp_ring->gen = VMXNET3_INIT_GEN;
286 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
287 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
288 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
289 size += rq->data_desc_size * data_ring->size;
291 memset(ring0->base, 0, size);
295 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
299 PMD_INIT_FUNC_TRACE();
301 for (i = 0; i < dev->data->nb_tx_queues; i++) {
302 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
306 vmxnet3_dev_tx_queue_reset(txq);
310 for (i = 0; i < dev->data->nb_rx_queues; i++) {
311 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
315 vmxnet3_dev_rx_queue_reset(rxq);
321 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
324 struct rte_mbuf *mbuf;
326 /* Release cmd_ring descriptor and free mbuf */
327 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
329 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
331 rte_panic("EOP desc does not point to a valid mbuf");
332 rte_pktmbuf_free(mbuf);
334 txq->cmd_ring.buf_info[eop_idx].m = NULL;
336 while (txq->cmd_ring.next2comp != eop_idx) {
337 /* no out-of-order completion */
338 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
339 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
343 /* Mark the txd for which tcd was generated as completed */
344 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
346 return completed + 1;
350 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
353 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
354 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
355 (comp_ring->base + comp_ring->next2proc);
357 while (tcd->gen == comp_ring->gen) {
358 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
360 vmxnet3_comp_ring_adv_next2proc(comp_ring);
361 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
362 comp_ring->next2proc);
365 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
369 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
377 for (i = 0; i != nb_pkts; i++) {
379 ol_flags = m->ol_flags;
381 /* Non-TSO packet cannot occupy more than
382 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
384 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
385 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
390 /* check that only supported TX offloads are requested. */
391 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
392 (ol_flags & PKT_TX_L4_MASK) ==
394 rte_errno = -ENOTSUP;
398 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
399 ret = rte_validate_tx_offload(m);
405 ret = rte_net_intel_cksum_prepare(m);
416 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
420 vmxnet3_tx_queue_t *txq = tx_queue;
421 struct vmxnet3_hw *hw = txq->hw;
422 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
423 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
425 if (unlikely(txq->stopped)) {
426 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
430 /* Free up the comp_descriptors aggressively */
431 vmxnet3_tq_tx_complete(txq);
434 while (nb_tx < nb_pkts) {
435 Vmxnet3_GenericDesc *gdesc;
436 vmxnet3_buf_info_t *tbi;
437 uint32_t first2fill, avail, dw2;
438 struct rte_mbuf *txm = tx_pkts[nb_tx];
439 struct rte_mbuf *m_seg = txm;
441 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
442 /* # of descriptors needed for a packet. */
443 unsigned count = txm->nb_segs;
445 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
447 /* Is command ring full? */
448 if (unlikely(avail == 0)) {
449 PMD_TX_LOG(DEBUG, "No free ring descriptors");
450 txq->stats.tx_ring_full++;
451 txq->stats.drop_total += (nb_pkts - nb_tx);
455 /* Command ring is not full but cannot handle the
456 * multi-segmented packet. Let's try the next packet
459 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
460 "(avail %d needed %d)", avail, count);
461 txq->stats.drop_total++;
463 txq->stats.drop_tso++;
464 rte_pktmbuf_free(txm);
469 /* Drop non-TSO packet that is excessively fragmented */
470 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
471 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
472 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
473 txq->stats.drop_too_many_segs++;
474 txq->stats.drop_total++;
475 rte_pktmbuf_free(txm);
480 if (txm->nb_segs == 1 &&
481 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
482 struct Vmxnet3_TxDataDesc *tdd;
484 tdd = (struct Vmxnet3_TxDataDesc *)
485 ((uint8 *)txq->data_ring.base +
486 txq->cmd_ring.next2fill *
487 txq->txdata_desc_size);
488 copy_size = rte_pktmbuf_pkt_len(txm);
489 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
492 /* use the previous gen bit for the SOP desc */
493 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
494 first2fill = txq->cmd_ring.next2fill;
496 /* Remember the transmit buffer for cleanup */
497 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
499 /* NB: the following assumes that VMXNET3 maximum
500 * transmit buffer size (16K) is greater than
501 * maximum size of mbuf segment size.
503 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
506 (uint64)txq->cmd_ring.next2fill *
507 txq->txdata_desc_size;
509 rte_cpu_to_le_64(txq->data_ring.basePA +
512 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
515 gdesc->dword[2] = dw2 | m_seg->data_len;
518 /* move to the next2fill descriptor */
519 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
521 /* use the right gen for non-SOP desc */
522 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
523 } while ((m_seg = m_seg->next) != NULL);
525 /* set the last buf_info for the pkt */
527 /* Update the EOP descriptor */
528 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
530 /* Add VLAN tag if present */
531 gdesc = txq->cmd_ring.base + first2fill;
532 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
534 gdesc->txd.tci = txm->vlan_tci;
538 uint16_t mss = txm->tso_segsz;
542 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
543 gdesc->txd.om = VMXNET3_OM_TSO;
544 gdesc->txd.msscof = mss;
546 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
547 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
548 gdesc->txd.om = VMXNET3_OM_CSUM;
549 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
551 switch (txm->ol_flags & PKT_TX_L4_MASK) {
552 case PKT_TX_TCP_CKSUM:
553 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
555 case PKT_TX_UDP_CKSUM:
556 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
559 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
560 txm->ol_flags & PKT_TX_L4_MASK);
566 gdesc->txd.om = VMXNET3_OM_NONE;
567 gdesc->txd.msscof = 0;
571 /* flip the GEN bit on the SOP */
572 rte_compiler_barrier();
573 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
575 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
579 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
581 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
582 txq_ctrl->txNumDeferred = 0;
583 /* Notify vSwitch that packets are available. */
584 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
585 txq->cmd_ring.next2fill);
592 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
593 struct rte_mbuf *mbuf)
596 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
597 struct Vmxnet3_RxDesc *rxd =
598 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
599 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
602 /* Usually: One HEAD type buf per packet
603 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
604 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
607 /* We use single packet buffer so all heads here */
608 val = VMXNET3_RXD_BTYPE_HEAD;
610 /* All BODY type buffers for 2nd ring */
611 val = VMXNET3_RXD_BTYPE_BODY;
615 * Load mbuf pointer into buf_info[ring_size]
616 * buf_info structure is equivalent to cookie for virtio-virtqueue
619 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
620 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
622 /* Load Rx Descriptor with the buffer's GPA */
623 rxd->addr = buf_info->bufPA;
625 /* After this point rxd->addr MUST not be NULL */
627 rxd->len = buf_info->len;
628 /* Flip gen bit at the end to change ownership */
629 rxd->gen = ring->gen;
631 vmxnet3_cmd_ring_adv_next2fill(ring);
634 * Allocates mbufs and clusters. Post rx descriptors with buffer details
635 * so that device can receive packets in those buffers.
637 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
638 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
639 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
640 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
644 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
648 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
650 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
651 struct rte_mbuf *mbuf;
653 /* Allocate blank mbuf for the current Rx Descriptor */
654 mbuf = rte_mbuf_raw_alloc(rxq->mp);
655 if (unlikely(mbuf == NULL)) {
656 PMD_RX_LOG(ERR, "Error allocating mbuf");
657 rxq->stats.rx_buf_alloc_failure++;
662 vmxnet3_renew_desc(rxq, ring_id, mbuf);
666 /* Return error only if no buffers are posted at present */
667 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
674 /* Receive side checksum and other offloads */
676 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
679 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
680 rxm->ol_flags |= PKT_RX_RSS_HASH;
681 rxm->hash.rss = rcd->rssHash;
684 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
686 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
687 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
689 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
690 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
692 rxm->packet_type = RTE_PTYPE_L3_IPV4;
696 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
698 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
699 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
705 * Process the Rx Completion Ring of given vmxnet3_rx_queue
706 * for nb_pkts burst and return the number of packets received
709 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
712 uint32_t nb_rxd, idx;
714 vmxnet3_rx_queue_t *rxq;
715 Vmxnet3_RxCompDesc *rcd;
716 vmxnet3_buf_info_t *rbi;
718 struct rte_mbuf *rxm = NULL;
719 struct vmxnet3_hw *hw;
729 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
731 if (unlikely(rxq->stopped)) {
732 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
736 while (rcd->gen == rxq->comp_ring.gen) {
737 struct rte_mbuf *newm;
739 if (nb_rx >= nb_pkts)
742 newm = rte_mbuf_raw_alloc(rxq->mp);
743 if (unlikely(newm == NULL)) {
744 PMD_RX_LOG(ERR, "Error allocating mbuf");
745 rxq->stats.rx_buf_alloc_failure++;
750 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
751 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
752 RTE_SET_USED(rxd); /* used only for assert when enabled */
753 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
755 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
757 RTE_ASSERT(rcd->len <= rxd->len);
760 /* Get the packet buffer pointer from buf_info */
763 /* Clear descriptor associated buf_info to be reused */
767 /* Update the index that we received a packet */
768 rxq->cmd_ring[ring_idx].next2comp = idx;
770 /* For RCD with EOP set, check if there is frame error */
771 if (unlikely(rcd->eop && rcd->err)) {
772 rxq->stats.drop_total++;
773 rxq->stats.drop_err++;
776 rxq->stats.drop_fcs++;
777 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
779 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
780 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
781 rxq->comp_ring.base), rcd->rxdIdx);
782 rte_pktmbuf_free_seg(rxm);
783 if (rxq->start_seg) {
784 struct rte_mbuf *start = rxq->start_seg;
786 rxq->start_seg = NULL;
787 rte_pktmbuf_free(start);
792 /* Initialize newly received packet buffer */
793 rxm->port = rxq->port_id;
796 rxm->pkt_len = (uint16_t)rcd->len;
797 rxm->data_len = (uint16_t)rcd->len;
798 rxm->data_off = RTE_PKTMBUF_HEADROOM;
803 * If this is the first buffer of the received packet,
804 * set the pointer to the first mbuf of the packet
805 * Otherwise, update the total length and the number of segments
806 * of the current scattered packet, and update the pointer to
807 * the last mbuf of the current packet.
810 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
812 if (unlikely(rcd->len == 0)) {
813 RTE_ASSERT(rcd->eop);
816 "Rx buf was skipped. rxring[%d][%d])",
818 rte_pktmbuf_free_seg(rxm);
822 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
823 uint8_t *rdd = rxq->data_ring.base +
824 idx * rxq->data_desc_size;
826 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
827 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
831 rxq->start_seg = rxm;
832 vmxnet3_rx_offload(rcd, rxm);
834 struct rte_mbuf *start = rxq->start_seg;
836 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
838 start->pkt_len += rxm->data_len;
841 rxq->last_seg->next = rxm;
846 struct rte_mbuf *start = rxq->start_seg;
848 /* Check for hardware stripped VLAN tag */
850 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
851 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
854 rx_pkts[nb_rx++] = start;
855 rxq->start_seg = NULL;
859 rxq->cmd_ring[ring_idx].next2comp = idx;
860 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
861 rxq->cmd_ring[ring_idx].size);
863 /* It's time to renew descriptors */
864 vmxnet3_renew_desc(rxq, ring_idx, newm);
865 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
866 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
867 rxq->cmd_ring[ring_idx].next2fill);
870 /* Advance to the next descriptor in comp_ring */
871 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
873 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
875 if (nb_rxd > rxq->cmd_ring[0].size) {
876 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
877 " relinquish control.");
882 if (unlikely(nb_rxd == 0)) {
884 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
885 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
886 if (unlikely(avail > 0)) {
887 /* try to alloc new buf and renew descriptors */
888 vmxnet3_post_rx_bufs(rxq, ring_idx);
891 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
892 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
893 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
894 rxq->cmd_ring[ring_idx].next2fill);
903 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
906 unsigned int socket_id,
907 const struct rte_eth_txconf *tx_conf)
909 struct vmxnet3_hw *hw = dev->data->dev_private;
910 const struct rte_memzone *mz;
911 struct vmxnet3_tx_queue *txq;
912 struct vmxnet3_cmd_ring *ring;
913 struct vmxnet3_comp_ring *comp_ring;
914 struct vmxnet3_data_ring *data_ring;
917 PMD_INIT_FUNC_TRACE();
919 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
920 ETH_TXQ_FLAGS_NOXSUMSCTP) {
921 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
925 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
926 RTE_CACHE_LINE_SIZE);
928 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
932 txq->queue_id = queue_idx;
933 txq->port_id = dev->data->port_id;
934 txq->shared = &hw->tqd_start[queue_idx];
936 txq->qid = queue_idx;
938 txq->txdata_desc_size = hw->txdata_desc_size;
940 ring = &txq->cmd_ring;
941 comp_ring = &txq->comp_ring;
942 data_ring = &txq->data_ring;
944 /* Tx vmxnet ring length should be between 512-4096 */
945 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
946 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
947 VMXNET3_DEF_TX_RING_SIZE);
949 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
950 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
951 VMXNET3_TX_RING_MAX_SIZE);
954 ring->size = nb_desc;
955 ring->size &= ~VMXNET3_RING_SIZE_MASK;
957 comp_ring->size = data_ring->size = ring->size;
959 /* Tx vmxnet rings structure initialization*/
962 ring->gen = VMXNET3_INIT_GEN;
963 comp_ring->next2proc = 0;
964 comp_ring->gen = VMXNET3_INIT_GEN;
966 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
967 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
968 size += txq->txdata_desc_size * data_ring->size;
970 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
971 VMXNET3_RING_BA_ALIGN, socket_id);
973 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
977 memset(mz->addr, 0, mz->len);
979 /* cmd_ring initialization */
980 ring->base = mz->addr;
981 ring->basePA = mz->phys_addr;
983 /* comp_ring initialization */
984 comp_ring->base = ring->base + ring->size;
985 comp_ring->basePA = ring->basePA +
986 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
988 /* data_ring initialization */
989 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
990 data_ring->basePA = comp_ring->basePA +
991 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
993 /* cmd_ring0 buf_info allocation */
994 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
995 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
996 if (ring->buf_info == NULL) {
997 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1001 /* Update the data portion with txq */
1002 dev->data->tx_queues[queue_idx] = txq;
1008 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1011 unsigned int socket_id,
1012 __rte_unused const struct rte_eth_rxconf *rx_conf,
1013 struct rte_mempool *mp)
1015 const struct rte_memzone *mz;
1016 struct vmxnet3_rx_queue *rxq;
1017 struct vmxnet3_hw *hw = dev->data->dev_private;
1018 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1019 struct vmxnet3_comp_ring *comp_ring;
1020 struct vmxnet3_rx_data_ring *data_ring;
1025 PMD_INIT_FUNC_TRACE();
1027 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1028 RTE_CACHE_LINE_SIZE);
1030 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1035 rxq->queue_id = queue_idx;
1036 rxq->port_id = dev->data->port_id;
1037 rxq->shared = &hw->rqd_start[queue_idx];
1039 rxq->qid1 = queue_idx;
1040 rxq->qid2 = queue_idx + hw->num_rx_queues;
1041 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1042 rxq->data_desc_size = hw->rxdata_desc_size;
1043 rxq->stopped = TRUE;
1045 ring0 = &rxq->cmd_ring[0];
1046 ring1 = &rxq->cmd_ring[1];
1047 comp_ring = &rxq->comp_ring;
1048 data_ring = &rxq->data_ring;
1050 /* Rx vmxnet rings length should be between 256-4096 */
1051 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1052 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1054 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1055 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1058 ring0->size = nb_desc;
1059 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1060 ring1->size = ring0->size;
1063 comp_ring->size = ring0->size + ring1->size;
1064 data_ring->size = ring0->size;
1066 /* Rx vmxnet rings structure initialization */
1067 ring0->next2fill = 0;
1068 ring1->next2fill = 0;
1069 ring0->next2comp = 0;
1070 ring1->next2comp = 0;
1071 ring0->gen = VMXNET3_INIT_GEN;
1072 ring1->gen = VMXNET3_INIT_GEN;
1073 comp_ring->next2proc = 0;
1074 comp_ring->gen = VMXNET3_INIT_GEN;
1076 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1077 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1078 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1079 size += rxq->data_desc_size * data_ring->size;
1081 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1082 VMXNET3_RING_BA_ALIGN, socket_id);
1084 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1088 memset(mz->addr, 0, mz->len);
1090 /* cmd_ring0 initialization */
1091 ring0->base = mz->addr;
1092 ring0->basePA = mz->phys_addr;
1094 /* cmd_ring1 initialization */
1095 ring1->base = ring0->base + ring0->size;
1096 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1098 /* comp_ring initialization */
1099 comp_ring->base = ring1->base + ring1->size;
1100 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1103 /* data_ring initialization */
1104 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1106 (uint8_t *)(comp_ring->base + comp_ring->size);
1107 data_ring->basePA = comp_ring->basePA +
1108 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1111 /* cmd_ring0-cmd_ring1 buf_info allocation */
1112 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1114 ring = &rxq->cmd_ring[i];
1116 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1118 ring->buf_info = rte_zmalloc(mem_name,
1119 ring->size * sizeof(vmxnet3_buf_info_t),
1120 RTE_CACHE_LINE_SIZE);
1121 if (ring->buf_info == NULL) {
1122 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1127 /* Update the data portion with rxq */
1128 dev->data->rx_queues[queue_idx] = rxq;
1134 * Initializes Receive Unit
1135 * Load mbufs in rx queue in advance
1138 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1140 struct vmxnet3_hw *hw = dev->data->dev_private;
1145 PMD_INIT_FUNC_TRACE();
1147 for (i = 0; i < hw->num_rx_queues; i++) {
1148 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1150 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1151 /* Passing 0 as alloc_num will allocate full ring */
1152 ret = vmxnet3_post_rx_bufs(rxq, j);
1155 "ERROR: Posting Rxq: %d buffers ring: %d",
1160 * Updating device with the index:next2fill to fill the
1161 * mbufs for coming packets.
1163 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1164 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1165 rxq->cmd_ring[j].next2fill);
1168 rxq->stopped = FALSE;
1169 rxq->start_seg = NULL;
1172 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1173 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1175 txq->stopped = FALSE;
1181 static uint8_t rss_intel_key[40] = {
1182 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1183 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1184 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1185 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1186 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1190 * Configure RSS feature
1193 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1195 struct vmxnet3_hw *hw = dev->data->dev_private;
1196 struct VMXNET3_RSSConf *dev_rss_conf;
1197 struct rte_eth_rss_conf *port_rss_conf;
1201 PMD_INIT_FUNC_TRACE();
1203 dev_rss_conf = hw->rss_conf;
1204 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1206 /* loading hashFunc */
1207 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1208 /* loading hashKeySize */
1209 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1210 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1211 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1213 if (port_rss_conf->rss_key == NULL) {
1214 /* Default hash key */
1215 port_rss_conf->rss_key = rss_intel_key;
1218 /* loading hashKey */
1219 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1220 dev_rss_conf->hashKeySize);
1222 /* loading indTable */
1223 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1224 if (j == dev->data->nb_rx_queues)
1226 dev_rss_conf->indTable[i] = j;
1229 /* loading hashType */
1230 dev_rss_conf->hashType = 0;
1231 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1232 if (rss_hf & ETH_RSS_IPV4)
1233 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1234 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1235 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1236 if (rss_hf & ETH_RSS_IPV6)
1237 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1238 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1239 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1241 return VMXNET3_SUCCESS;