4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
164 for (i = 0; i < ring->size; i++) {
165 /* No need to worry about desc ownership, device is quiesced by now. */
166 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
169 rte_pktmbuf_free_seg(buf_info->m);
174 vmxnet3_cmd_ring_adv_next2comp(ring);
179 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
181 rte_free(ring->buf_info);
182 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
192 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
193 /* Release the cmd_ring */
194 vmxnet3_cmd_ring_release(&tq->cmd_ring);
199 vmxnet3_dev_rx_queue_release(void *rxq)
202 vmxnet3_rx_queue_t *rq = rxq;
206 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
207 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
209 /* Release both the cmd_rings */
210 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
211 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
216 vmxnet3_dev_tx_queue_reset(void *txq)
218 vmxnet3_tx_queue_t *tq = txq;
219 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
220 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
221 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
225 /* Release the cmd_ring mbufs */
226 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
229 /* Tx vmxnet rings structure initialization*/
232 ring->gen = VMXNET3_INIT_GEN;
233 comp_ring->next2proc = 0;
234 comp_ring->gen = VMXNET3_INIT_GEN;
236 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
237 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
238 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
240 memset(ring->base, 0, size);
244 vmxnet3_dev_rx_queue_reset(void *rxq)
247 vmxnet3_rx_queue_t *rq = rxq;
248 struct vmxnet3_cmd_ring *ring0, *ring1;
249 struct vmxnet3_comp_ring *comp_ring;
253 /* Release both the cmd_rings mbufs */
254 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
255 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
258 ring0 = &rq->cmd_ring[0];
259 ring1 = &rq->cmd_ring[1];
260 comp_ring = &rq->comp_ring;
262 /* Rx vmxnet rings structure initialization */
263 ring0->next2fill = 0;
264 ring1->next2fill = 0;
265 ring0->next2comp = 0;
266 ring1->next2comp = 0;
267 ring0->gen = VMXNET3_INIT_GEN;
268 ring1->gen = VMXNET3_INIT_GEN;
269 comp_ring->next2proc = 0;
270 comp_ring->gen = VMXNET3_INIT_GEN;
272 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
273 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
275 memset(ring0->base, 0, size);
279 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
283 PMD_INIT_FUNC_TRACE();
285 for (i = 0; i < dev->data->nb_tx_queues; i++) {
286 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
290 vmxnet3_dev_tx_queue_reset(txq);
294 for (i = 0; i < dev->data->nb_rx_queues; i++) {
295 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
299 vmxnet3_dev_rx_queue_reset(rxq);
305 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
308 struct rte_mbuf *mbuf;
310 /* Release cmd_ring descriptor and free mbuf */
311 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
313 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
315 rte_panic("EOP desc does not point to a valid mbuf");
316 rte_pktmbuf_free(mbuf);
318 txq->cmd_ring.buf_info[eop_idx].m = NULL;
320 while (txq->cmd_ring.next2comp != eop_idx) {
321 /* no out-of-order completion */
322 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
323 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
327 /* Mark the txd for which tcd was generated as completed */
328 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
330 return completed + 1;
334 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
337 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
338 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
339 (comp_ring->base + comp_ring->next2proc);
341 while (tcd->gen == comp_ring->gen) {
342 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
344 vmxnet3_comp_ring_adv_next2proc(comp_ring);
345 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
346 comp_ring->next2proc);
349 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
353 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
357 vmxnet3_tx_queue_t *txq = tx_queue;
358 struct vmxnet3_hw *hw = txq->hw;
359 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
360 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
362 if (unlikely(txq->stopped)) {
363 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
367 /* Free up the comp_descriptors aggressively */
368 vmxnet3_tq_tx_complete(txq);
371 while (nb_tx < nb_pkts) {
372 Vmxnet3_GenericDesc *gdesc;
373 vmxnet3_buf_info_t *tbi;
374 uint32_t first2fill, avail, dw2;
375 struct rte_mbuf *txm = tx_pkts[nb_tx];
376 struct rte_mbuf *m_seg = txm;
378 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
379 /* # of descriptors needed for a packet. */
380 unsigned count = txm->nb_segs;
382 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
384 /* Is command ring full? */
385 if (unlikely(avail == 0)) {
386 PMD_TX_LOG(DEBUG, "No free ring descriptors");
387 txq->stats.tx_ring_full++;
388 txq->stats.drop_total += (nb_pkts - nb_tx);
392 /* Command ring is not full but cannot handle the
393 * multi-segmented packet. Let's try the next packet
396 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
397 "(avail %d needed %d)", avail, count);
398 txq->stats.drop_total++;
400 txq->stats.drop_tso++;
401 rte_pktmbuf_free(txm);
406 /* Drop non-TSO packet that is excessively fragmented */
407 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
408 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
409 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
410 txq->stats.drop_too_many_segs++;
411 txq->stats.drop_total++;
412 rte_pktmbuf_free(txm);
417 if (txm->nb_segs == 1 &&
418 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
419 struct Vmxnet3_TxDataDesc *tdd;
421 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
422 copy_size = rte_pktmbuf_pkt_len(txm);
423 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
426 /* use the previous gen bit for the SOP desc */
427 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
428 first2fill = txq->cmd_ring.next2fill;
430 /* Remember the transmit buffer for cleanup */
431 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
433 /* NB: the following assumes that VMXNET3 maximum
434 * transmit buffer size (16K) is greater than
435 * maximum size of mbuf segment size.
437 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
439 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
440 txq->cmd_ring.next2fill *
441 sizeof(struct Vmxnet3_TxDataDesc));
443 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
445 gdesc->dword[2] = dw2 | m_seg->data_len;
448 /* move to the next2fill descriptor */
449 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
451 /* use the right gen for non-SOP desc */
452 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
453 } while ((m_seg = m_seg->next) != NULL);
455 /* set the last buf_info for the pkt */
457 /* Update the EOP descriptor */
458 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
460 /* Add VLAN tag if present */
461 gdesc = txq->cmd_ring.base + first2fill;
462 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
464 gdesc->txd.tci = txm->vlan_tci;
468 uint16_t mss = txm->tso_segsz;
472 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
473 gdesc->txd.om = VMXNET3_OM_TSO;
474 gdesc->txd.msscof = mss;
476 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
477 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
478 gdesc->txd.om = VMXNET3_OM_CSUM;
479 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
481 switch (txm->ol_flags & PKT_TX_L4_MASK) {
482 case PKT_TX_TCP_CKSUM:
483 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
485 case PKT_TX_UDP_CKSUM:
486 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
489 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
490 txm->ol_flags & PKT_TX_L4_MASK);
496 gdesc->txd.om = VMXNET3_OM_NONE;
497 gdesc->txd.msscof = 0;
501 /* flip the GEN bit on the SOP */
502 rte_compiler_barrier();
503 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
505 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
509 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
511 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
512 txq_ctrl->txNumDeferred = 0;
513 /* Notify vSwitch that packets are available. */
514 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
515 txq->cmd_ring.next2fill);
522 * Allocates mbufs and clusters. Post rx descriptors with buffer details
523 * so that device can receive packets in those buffers.
525 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
526 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
527 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
528 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
532 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
535 uint32_t i = 0, val = 0;
536 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
539 /* Usually: One HEAD type buf per packet
540 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
541 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
544 /* We use single packet buffer so all heads here */
545 val = VMXNET3_RXD_BTYPE_HEAD;
547 /* All BODY type buffers for 2nd ring */
548 val = VMXNET3_RXD_BTYPE_BODY;
551 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
552 struct Vmxnet3_RxDesc *rxd;
553 struct rte_mbuf *mbuf;
554 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
556 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
558 /* Allocate blank mbuf for the current Rx Descriptor */
559 mbuf = rte_mbuf_raw_alloc(rxq->mp);
560 if (unlikely(mbuf == NULL)) {
561 PMD_RX_LOG(ERR, "Error allocating mbuf");
562 rxq->stats.rx_buf_alloc_failure++;
568 * Load mbuf pointer into buf_info[ring_size]
569 * buf_info structure is equivalent to cookie for virtio-virtqueue
572 buf_info->len = (uint16_t)(mbuf->buf_len -
573 RTE_PKTMBUF_HEADROOM);
574 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
576 /* Load Rx Descriptor with the buffer's GPA */
577 rxd->addr = buf_info->bufPA;
579 /* After this point rxd->addr MUST not be NULL */
581 rxd->len = buf_info->len;
582 /* Flip gen bit at the end to change ownership */
583 rxd->gen = ring->gen;
585 vmxnet3_cmd_ring_adv_next2fill(ring);
589 /* Return error only if no buffers are posted at present */
590 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
597 /* Receive side checksum and other offloads */
599 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
602 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
603 rxm->ol_flags |= PKT_RX_RSS_HASH;
604 rxm->hash.rss = rcd->rssHash;
607 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
609 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
610 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
612 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
613 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
615 rxm->packet_type = RTE_PTYPE_L3_IPV4;
619 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
621 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
622 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
628 * Process the Rx Completion Ring of given vmxnet3_rx_queue
629 * for nb_pkts burst and return the number of packets received
632 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
635 uint32_t nb_rxd, idx;
637 vmxnet3_rx_queue_t *rxq;
638 Vmxnet3_RxCompDesc *rcd;
639 vmxnet3_buf_info_t *rbi;
641 struct rte_mbuf *rxm = NULL;
642 struct vmxnet3_hw *hw;
652 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
654 if (unlikely(rxq->stopped)) {
655 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
659 while (rcd->gen == rxq->comp_ring.gen) {
660 if (nb_rx >= nb_pkts)
664 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
665 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
666 RTE_SET_USED(rxd); /* used only for assert when enabled */
667 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
669 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
671 RTE_ASSERT(rcd->len <= rxd->len);
674 /* Get the packet buffer pointer from buf_info */
677 /* Clear descriptor associated buf_info to be reused */
681 /* Update the index that we received a packet */
682 rxq->cmd_ring[ring_idx].next2comp = idx;
684 /* For RCD with EOP set, check if there is frame error */
685 if (unlikely(rcd->eop && rcd->err)) {
686 rxq->stats.drop_total++;
687 rxq->stats.drop_err++;
690 rxq->stats.drop_fcs++;
691 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
693 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
694 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
695 rxq->comp_ring.base), rcd->rxdIdx);
696 rte_pktmbuf_free_seg(rxm);
700 /* Initialize newly received packet buffer */
701 rxm->port = rxq->port_id;
704 rxm->pkt_len = (uint16_t)rcd->len;
705 rxm->data_len = (uint16_t)rcd->len;
706 rxm->data_off = RTE_PKTMBUF_HEADROOM;
711 * If this is the first buffer of the received packet,
712 * set the pointer to the first mbuf of the packet
713 * Otherwise, update the total length and the number of segments
714 * of the current scattered packet, and update the pointer to
715 * the last mbuf of the current packet.
718 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
720 if (unlikely(rcd->len == 0)) {
721 RTE_ASSERT(rcd->eop);
724 "Rx buf was skipped. rxring[%d][%d])",
726 rte_pktmbuf_free_seg(rxm);
730 rxq->start_seg = rxm;
731 vmxnet3_rx_offload(rcd, rxm);
733 struct rte_mbuf *start = rxq->start_seg;
735 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
737 start->pkt_len += rxm->data_len;
740 rxq->last_seg->next = rxm;
745 struct rte_mbuf *start = rxq->start_seg;
747 /* Check for hardware stripped VLAN tag */
749 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
750 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
753 rx_pkts[nb_rx++] = start;
754 rxq->start_seg = NULL;
758 rxq->cmd_ring[ring_idx].next2comp = idx;
759 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
760 rxq->cmd_ring[ring_idx].size);
762 /* It's time to allocate some new buf and renew descriptors */
763 vmxnet3_post_rx_bufs(rxq, ring_idx);
764 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
765 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
766 rxq->cmd_ring[ring_idx].next2fill);
769 /* Advance to the next descriptor in comp_ring */
770 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
772 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
774 if (nb_rxd > rxq->cmd_ring[0].size) {
775 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
776 " relinquish control.");
785 * Create memzone for device rings. malloc can't be used as the physical address is
786 * needed. If the memzone is already created, then this function returns a ptr
789 static const struct rte_memzone *
790 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
791 uint16_t queue_id, uint32_t ring_size, int socket_id)
793 char z_name[RTE_MEMZONE_NAMESIZE];
794 const struct rte_memzone *mz;
796 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
797 dev->driver->pci_drv.driver.name, ring_name,
798 dev->data->port_id, queue_id);
800 mz = rte_memzone_lookup(z_name);
804 return rte_memzone_reserve_aligned(z_name, ring_size,
805 socket_id, 0, VMXNET3_RING_BA_ALIGN);
809 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
812 unsigned int socket_id,
813 __rte_unused const struct rte_eth_txconf *tx_conf)
815 struct vmxnet3_hw *hw = dev->data->dev_private;
816 const struct rte_memzone *mz;
817 struct vmxnet3_tx_queue *txq;
818 struct vmxnet3_cmd_ring *ring;
819 struct vmxnet3_comp_ring *comp_ring;
820 struct vmxnet3_data_ring *data_ring;
823 PMD_INIT_FUNC_TRACE();
825 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
826 ETH_TXQ_FLAGS_NOXSUMSCTP) {
827 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
831 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
832 RTE_CACHE_LINE_SIZE);
834 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
838 txq->queue_id = queue_idx;
839 txq->port_id = dev->data->port_id;
840 txq->shared = &hw->tqd_start[queue_idx];
842 txq->qid = queue_idx;
845 ring = &txq->cmd_ring;
846 comp_ring = &txq->comp_ring;
847 data_ring = &txq->data_ring;
849 /* Tx vmxnet ring length should be between 512-4096 */
850 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
851 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
852 VMXNET3_DEF_TX_RING_SIZE);
854 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
855 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
856 VMXNET3_TX_RING_MAX_SIZE);
859 ring->size = nb_desc;
860 ring->size &= ~VMXNET3_RING_SIZE_MASK;
862 comp_ring->size = data_ring->size = ring->size;
864 /* Tx vmxnet rings structure initialization*/
867 ring->gen = VMXNET3_INIT_GEN;
868 comp_ring->next2proc = 0;
869 comp_ring->gen = VMXNET3_INIT_GEN;
871 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
872 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
873 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
875 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
877 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
880 memset(mz->addr, 0, mz->len);
882 /* cmd_ring initialization */
883 ring->base = mz->addr;
884 ring->basePA = mz->phys_addr;
886 /* comp_ring initialization */
887 comp_ring->base = ring->base + ring->size;
888 comp_ring->basePA = ring->basePA +
889 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
891 /* data_ring initialization */
892 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
893 data_ring->basePA = comp_ring->basePA +
894 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
896 /* cmd_ring0 buf_info allocation */
897 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
898 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
899 if (ring->buf_info == NULL) {
900 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
904 /* Update the data portion with txq */
905 dev->data->tx_queues[queue_idx] = txq;
911 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
914 unsigned int socket_id,
915 __rte_unused const struct rte_eth_rxconf *rx_conf,
916 struct rte_mempool *mp)
918 const struct rte_memzone *mz;
919 struct vmxnet3_rx_queue *rxq;
920 struct vmxnet3_hw *hw = dev->data->dev_private;
921 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
922 struct vmxnet3_comp_ring *comp_ring;
927 PMD_INIT_FUNC_TRACE();
929 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
930 RTE_CACHE_LINE_SIZE);
932 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
937 rxq->queue_id = queue_idx;
938 rxq->port_id = dev->data->port_id;
939 rxq->shared = &hw->rqd_start[queue_idx];
941 rxq->qid1 = queue_idx;
942 rxq->qid2 = queue_idx + hw->num_rx_queues;
945 ring0 = &rxq->cmd_ring[0];
946 ring1 = &rxq->cmd_ring[1];
947 comp_ring = &rxq->comp_ring;
949 /* Rx vmxnet rings length should be between 256-4096 */
950 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
951 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
953 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
954 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
957 ring0->size = nb_desc;
958 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
959 ring1->size = ring0->size;
962 comp_ring->size = ring0->size + ring1->size;
964 /* Rx vmxnet rings structure initialization */
965 ring0->next2fill = 0;
966 ring1->next2fill = 0;
967 ring0->next2comp = 0;
968 ring1->next2comp = 0;
969 ring0->gen = VMXNET3_INIT_GEN;
970 ring1->gen = VMXNET3_INIT_GEN;
971 comp_ring->next2proc = 0;
972 comp_ring->gen = VMXNET3_INIT_GEN;
974 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
975 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
977 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
979 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
982 memset(mz->addr, 0, mz->len);
984 /* cmd_ring0 initialization */
985 ring0->base = mz->addr;
986 ring0->basePA = mz->phys_addr;
988 /* cmd_ring1 initialization */
989 ring1->base = ring0->base + ring0->size;
990 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
992 /* comp_ring initialization */
993 comp_ring->base = ring1->base + ring1->size;
994 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
997 /* cmd_ring0-cmd_ring1 buf_info allocation */
998 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1000 ring = &rxq->cmd_ring[i];
1002 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1004 ring->buf_info = rte_zmalloc(mem_name,
1005 ring->size * sizeof(vmxnet3_buf_info_t),
1006 RTE_CACHE_LINE_SIZE);
1007 if (ring->buf_info == NULL) {
1008 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1013 /* Update the data portion with rxq */
1014 dev->data->rx_queues[queue_idx] = rxq;
1020 * Initializes Receive Unit
1021 * Load mbufs in rx queue in advance
1024 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1026 struct vmxnet3_hw *hw = dev->data->dev_private;
1031 PMD_INIT_FUNC_TRACE();
1033 for (i = 0; i < hw->num_rx_queues; i++) {
1034 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1036 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1037 /* Passing 0 as alloc_num will allocate full ring */
1038 ret = vmxnet3_post_rx_bufs(rxq, j);
1041 "ERROR: Posting Rxq: %d buffers ring: %d",
1046 * Updating device with the index:next2fill to fill the
1047 * mbufs for coming packets.
1049 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1050 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1051 rxq->cmd_ring[j].next2fill);
1054 rxq->stopped = FALSE;
1055 rxq->start_seg = NULL;
1058 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1059 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1061 txq->stopped = FALSE;
1067 static uint8_t rss_intel_key[40] = {
1068 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1069 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1070 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1071 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1072 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1076 * Configure RSS feature
1079 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1081 struct vmxnet3_hw *hw = dev->data->dev_private;
1082 struct VMXNET3_RSSConf *dev_rss_conf;
1083 struct rte_eth_rss_conf *port_rss_conf;
1087 PMD_INIT_FUNC_TRACE();
1089 dev_rss_conf = hw->rss_conf;
1090 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1092 /* loading hashFunc */
1093 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1094 /* loading hashKeySize */
1095 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1096 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1097 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1099 if (port_rss_conf->rss_key == NULL) {
1100 /* Default hash key */
1101 port_rss_conf->rss_key = rss_intel_key;
1104 /* loading hashKey */
1105 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1106 dev_rss_conf->hashKeySize);
1108 /* loading indTable */
1109 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1110 if (j == dev->data->nb_rx_queues)
1112 dev_rss_conf->indTable[i] = j;
1115 /* loading hashType */
1116 dev_rss_conf->hashType = 0;
1117 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1118 if (rss_hf & ETH_RSS_IPV4)
1119 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1120 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1121 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1122 if (rss_hf & ETH_RSS_IPV6)
1123 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1124 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1125 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1127 return VMXNET3_SUCCESS;