4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about tx desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 vmxnet3_cmd_ring_release_mbufs(ring);
163 rte_free(ring->buf_info);
164 ring->buf_info = NULL;
169 vmxnet3_dev_tx_queue_release(void *txq)
171 vmxnet3_tx_queue_t *tq = txq;
174 /* Release the cmd_ring */
175 vmxnet3_cmd_ring_release(&tq->cmd_ring);
180 vmxnet3_dev_rx_queue_release(void *rxq)
183 vmxnet3_rx_queue_t *rq = rxq;
186 /* Release both the cmd_rings */
187 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
188 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
193 vmxnet3_dev_tx_queue_reset(void *txq)
195 vmxnet3_tx_queue_t *tq = txq;
196 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
197 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
198 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
202 /* Release the cmd_ring mbufs */
203 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
206 /* Tx vmxnet rings structure initialization*/
209 ring->gen = VMXNET3_INIT_GEN;
210 comp_ring->next2proc = 0;
211 comp_ring->gen = VMXNET3_INIT_GEN;
213 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
214 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
215 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
217 memset(ring->base, 0, size);
221 vmxnet3_dev_rx_queue_reset(void *rxq)
224 vmxnet3_rx_queue_t *rq = rxq;
225 struct vmxnet3_cmd_ring *ring0, *ring1;
226 struct vmxnet3_comp_ring *comp_ring;
230 /* Release both the cmd_rings mbufs */
231 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
232 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
235 ring0 = &rq->cmd_ring[0];
236 ring1 = &rq->cmd_ring[1];
237 comp_ring = &rq->comp_ring;
239 /* Rx vmxnet rings structure initialization */
240 ring0->next2fill = 0;
241 ring1->next2fill = 0;
242 ring0->next2comp = 0;
243 ring1->next2comp = 0;
244 ring0->gen = VMXNET3_INIT_GEN;
245 ring1->gen = VMXNET3_INIT_GEN;
246 comp_ring->next2proc = 0;
247 comp_ring->gen = VMXNET3_INIT_GEN;
249 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
250 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
252 memset(ring0->base, 0, size);
256 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
260 PMD_INIT_FUNC_TRACE();
262 for (i = 0; i < dev->data->nb_tx_queues; i++) {
263 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
267 vmxnet3_dev_tx_queue_reset(txq);
271 for (i = 0; i < dev->data->nb_rx_queues; i++) {
272 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
276 vmxnet3_dev_rx_queue_reset(rxq);
282 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
285 struct rte_mbuf *mbuf;
287 /* Release cmd_ring descriptor and free mbuf */
288 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
290 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
292 rte_panic("EOP desc does not point to a valid mbuf");
293 rte_pktmbuf_free(mbuf);
295 txq->cmd_ring.buf_info[eop_idx].m = NULL;
297 while (txq->cmd_ring.next2comp != eop_idx) {
298 /* no out-of-order completion */
299 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
300 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
304 /* Mark the txd for which tcd was generated as completed */
305 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
307 return completed + 1;
311 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
314 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
315 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
316 (comp_ring->base + comp_ring->next2proc);
318 while (tcd->gen == comp_ring->gen) {
319 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
321 vmxnet3_comp_ring_adv_next2proc(comp_ring);
322 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
323 comp_ring->next2proc);
326 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
330 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
334 vmxnet3_tx_queue_t *txq = tx_queue;
335 struct vmxnet3_hw *hw = txq->hw;
336 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
337 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
339 if (unlikely(txq->stopped)) {
340 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
344 /* Free up the comp_descriptors aggressively */
345 vmxnet3_tq_tx_complete(txq);
348 while (nb_tx < nb_pkts) {
349 Vmxnet3_GenericDesc *gdesc;
350 vmxnet3_buf_info_t *tbi;
351 uint32_t first2fill, avail, dw2;
352 struct rte_mbuf *txm = tx_pkts[nb_tx];
353 struct rte_mbuf *m_seg = txm;
355 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
356 /* # of descriptors needed for a packet. */
357 unsigned count = txm->nb_segs;
359 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
361 /* Is command ring full? */
362 if (unlikely(avail == 0)) {
363 PMD_TX_LOG(DEBUG, "No free ring descriptors");
364 txq->stats.tx_ring_full++;
365 txq->stats.drop_total += (nb_pkts - nb_tx);
369 /* Command ring is not full but cannot handle the
370 * multi-segmented packet. Let's try the next packet
373 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
374 "(avail %d needed %d)", avail, count);
375 txq->stats.drop_total++;
377 txq->stats.drop_tso++;
378 rte_pktmbuf_free(txm);
383 /* Drop non-TSO packet that is excessively fragmented */
384 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
385 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
386 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
387 txq->stats.drop_too_many_segs++;
388 txq->stats.drop_total++;
389 rte_pktmbuf_free(txm);
394 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
395 struct Vmxnet3_TxDataDesc *tdd;
397 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
398 copy_size = rte_pktmbuf_pkt_len(txm);
399 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
402 /* use the previous gen bit for the SOP desc */
403 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
404 first2fill = txq->cmd_ring.next2fill;
406 /* Remember the transmit buffer for cleanup */
407 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
409 /* NB: the following assumes that VMXNET3 maximum
410 * transmit buffer size (16K) is greater than
411 * maximum size of mbuf segment size.
413 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
415 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
416 txq->cmd_ring.next2fill *
417 sizeof(struct Vmxnet3_TxDataDesc));
419 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
421 gdesc->dword[2] = dw2 | m_seg->data_len;
424 /* move to the next2fill descriptor */
425 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
427 /* use the right gen for non-SOP desc */
428 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
429 } while ((m_seg = m_seg->next) != NULL);
431 /* set the last buf_info for the pkt */
433 /* Update the EOP descriptor */
434 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
436 /* Add VLAN tag if present */
437 gdesc = txq->cmd_ring.base + first2fill;
438 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
440 gdesc->txd.tci = txm->vlan_tci;
444 uint16_t mss = txm->tso_segsz;
448 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
449 gdesc->txd.om = VMXNET3_OM_TSO;
450 gdesc->txd.msscof = mss;
452 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
453 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
454 gdesc->txd.om = VMXNET3_OM_CSUM;
455 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
457 switch (txm->ol_flags & PKT_TX_L4_MASK) {
458 case PKT_TX_TCP_CKSUM:
459 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
461 case PKT_TX_UDP_CKSUM:
462 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
465 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
466 txm->ol_flags & PKT_TX_L4_MASK);
472 gdesc->txd.om = VMXNET3_OM_NONE;
473 gdesc->txd.msscof = 0;
477 /* flip the GEN bit on the SOP */
478 rte_compiler_barrier();
479 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
481 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
485 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
487 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
488 txq_ctrl->txNumDeferred = 0;
489 /* Notify vSwitch that packets are available. */
490 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
491 txq->cmd_ring.next2fill);
498 * Allocates mbufs and clusters. Post rx descriptors with buffer details
499 * so that device can receive packets in those buffers.
501 * Among the two rings, 1st ring contains buffers of type 0 and type1.
502 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
503 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
504 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
509 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
512 uint32_t i = 0, val = 0;
513 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
516 /* Usually: One HEAD type buf per packet
517 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
518 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
521 /* We use single packet buffer so all heads here */
522 val = VMXNET3_RXD_BTYPE_HEAD;
524 /* All BODY type buffers for 2nd ring */
525 val = VMXNET3_RXD_BTYPE_BODY;
528 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
529 struct Vmxnet3_RxDesc *rxd;
530 struct rte_mbuf *mbuf;
531 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
533 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
535 /* Allocate blank mbuf for the current Rx Descriptor */
536 mbuf = rte_mbuf_raw_alloc(rxq->mp);
537 if (unlikely(mbuf == NULL)) {
538 PMD_RX_LOG(ERR, "Error allocating mbuf");
539 rxq->stats.rx_buf_alloc_failure++;
545 * Load mbuf pointer into buf_info[ring_size]
546 * buf_info structure is equivalent to cookie for virtio-virtqueue
549 buf_info->len = (uint16_t)(mbuf->buf_len -
550 RTE_PKTMBUF_HEADROOM);
552 rte_mbuf_data_dma_addr_default(mbuf);
554 /* Load Rx Descriptor with the buffer's GPA */
555 rxd->addr = buf_info->bufPA;
557 /* After this point rxd->addr MUST not be NULL */
559 rxd->len = buf_info->len;
560 /* Flip gen bit at the end to change ownership */
561 rxd->gen = ring->gen;
563 vmxnet3_cmd_ring_adv_next2fill(ring);
567 /* Return error only if no buffers are posted at present */
568 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
575 /* Receive side checksum and other offloads */
577 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
580 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
581 rxm->ol_flags |= PKT_RX_RSS_HASH;
582 rxm->hash.rss = rcd->rssHash;
585 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
587 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
588 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
590 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
591 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
593 rxm->packet_type = RTE_PTYPE_L3_IPV4;
597 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
599 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
600 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
606 * Process the Rx Completion Ring of given vmxnet3_rx_queue
607 * for nb_pkts burst and return the number of packets received
610 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
613 uint32_t nb_rxd, idx;
615 vmxnet3_rx_queue_t *rxq;
616 Vmxnet3_RxCompDesc *rcd;
617 vmxnet3_buf_info_t *rbi;
619 struct rte_mbuf *rxm = NULL;
620 struct vmxnet3_hw *hw;
630 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
632 if (unlikely(rxq->stopped)) {
633 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
637 while (rcd->gen == rxq->comp_ring.gen) {
638 if (nb_rx >= nb_pkts)
642 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
643 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
644 RTE_SET_USED(rxd); /* used only for assert when enabled */
645 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
647 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
649 RTE_ASSERT(rcd->len <= rxd->len);
652 /* Get the packet buffer pointer from buf_info */
655 /* Clear descriptor associated buf_info to be reused */
659 /* Update the index that we received a packet */
660 rxq->cmd_ring[ring_idx].next2comp = idx;
662 /* For RCD with EOP set, check if there is frame error */
663 if (unlikely(rcd->eop && rcd->err)) {
664 rxq->stats.drop_total++;
665 rxq->stats.drop_err++;
668 rxq->stats.drop_fcs++;
669 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
671 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
672 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
673 rxq->comp_ring.base), rcd->rxdIdx);
674 rte_pktmbuf_free_seg(rxm);
679 /* Initialize newly received packet buffer */
680 rxm->port = rxq->port_id;
683 rxm->pkt_len = (uint16_t)rcd->len;
684 rxm->data_len = (uint16_t)rcd->len;
685 rxm->data_off = RTE_PKTMBUF_HEADROOM;
690 * If this is the first buffer of the received packet,
691 * set the pointer to the first mbuf of the packet
692 * Otherwise, update the total length and the number of segments
693 * of the current scattered packet, and update the pointer to
694 * the last mbuf of the current packet.
697 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
699 if (unlikely(rcd->len == 0)) {
700 RTE_ASSERT(rcd->eop);
703 "Rx buf was skipped. rxring[%d][%d])",
705 rte_pktmbuf_free_seg(rxm);
709 rxq->start_seg = rxm;
710 vmxnet3_rx_offload(rcd, rxm);
712 struct rte_mbuf *start = rxq->start_seg;
714 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
716 start->pkt_len += rxm->data_len;
719 rxq->last_seg->next = rxm;
724 struct rte_mbuf *start = rxq->start_seg;
726 /* Check for hardware stripped VLAN tag */
728 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
729 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
732 rx_pkts[nb_rx++] = start;
733 rxq->start_seg = NULL;
737 rxq->cmd_ring[ring_idx].next2comp = idx;
738 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
740 /* It's time to allocate some new buf and renew descriptors */
741 vmxnet3_post_rx_bufs(rxq, ring_idx);
742 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
743 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
744 rxq->cmd_ring[ring_idx].next2fill);
747 /* Advance to the next descriptor in comp_ring */
748 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
750 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
752 if (nb_rxd > rxq->cmd_ring[0].size) {
754 "Used up quota of receiving packets,"
755 " relinquish control.");
764 * Create memzone for device rings. malloc can't be used as the physical address is
765 * needed. If the memzone is already created, then this function returns a ptr
768 static const struct rte_memzone *
769 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
770 uint16_t queue_id, uint32_t ring_size, int socket_id)
772 char z_name[RTE_MEMZONE_NAMESIZE];
773 const struct rte_memzone *mz;
775 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
776 dev->driver->pci_drv.name, ring_name,
777 dev->data->port_id, queue_id);
779 mz = rte_memzone_lookup(z_name);
783 return rte_memzone_reserve_aligned(z_name, ring_size,
784 socket_id, 0, VMXNET3_RING_BA_ALIGN);
788 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
791 unsigned int socket_id,
792 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
794 struct vmxnet3_hw *hw = dev->data->dev_private;
795 const struct rte_memzone *mz;
796 struct vmxnet3_tx_queue *txq;
797 struct vmxnet3_cmd_ring *ring;
798 struct vmxnet3_comp_ring *comp_ring;
799 struct vmxnet3_data_ring *data_ring;
802 PMD_INIT_FUNC_TRACE();
804 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
805 ETH_TXQ_FLAGS_NOXSUMSCTP) {
806 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
810 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
812 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
816 txq->queue_id = queue_idx;
817 txq->port_id = dev->data->port_id;
818 txq->shared = &hw->tqd_start[queue_idx];
820 txq->qid = queue_idx;
823 ring = &txq->cmd_ring;
824 comp_ring = &txq->comp_ring;
825 data_ring = &txq->data_ring;
827 /* Tx vmxnet ring length should be between 512-4096 */
828 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
829 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
830 VMXNET3_DEF_TX_RING_SIZE);
832 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
833 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
834 VMXNET3_TX_RING_MAX_SIZE);
837 ring->size = nb_desc;
838 ring->size &= ~VMXNET3_RING_SIZE_MASK;
840 comp_ring->size = data_ring->size = ring->size;
842 /* Tx vmxnet rings structure initialization*/
845 ring->gen = VMXNET3_INIT_GEN;
846 comp_ring->next2proc = 0;
847 comp_ring->gen = VMXNET3_INIT_GEN;
849 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
850 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
851 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
853 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
855 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
858 memset(mz->addr, 0, mz->len);
860 /* cmd_ring initialization */
861 ring->base = mz->addr;
862 ring->basePA = mz->phys_addr;
864 /* comp_ring initialization */
865 comp_ring->base = ring->base + ring->size;
866 comp_ring->basePA = ring->basePA +
867 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
869 /* data_ring initialization */
870 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
871 data_ring->basePA = comp_ring->basePA +
872 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
874 /* cmd_ring0 buf_info allocation */
875 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
876 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
877 if (ring->buf_info == NULL) {
878 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
882 /* Update the data portion with txq */
883 dev->data->tx_queues[queue_idx] = txq;
889 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
892 unsigned int socket_id,
893 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
894 struct rte_mempool *mp)
896 const struct rte_memzone *mz;
897 struct vmxnet3_rx_queue *rxq;
898 struct vmxnet3_hw *hw = dev->data->dev_private;
899 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
900 struct vmxnet3_comp_ring *comp_ring;
905 PMD_INIT_FUNC_TRACE();
907 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
909 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
914 rxq->queue_id = queue_idx;
915 rxq->port_id = dev->data->port_id;
916 rxq->shared = &hw->rqd_start[queue_idx];
918 rxq->qid1 = queue_idx;
919 rxq->qid2 = queue_idx + hw->num_rx_queues;
922 ring0 = &rxq->cmd_ring[0];
923 ring1 = &rxq->cmd_ring[1];
924 comp_ring = &rxq->comp_ring;
926 /* Rx vmxnet rings length should be between 256-4096 */
927 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
928 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
930 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
931 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
934 ring0->size = nb_desc;
935 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
936 ring1->size = ring0->size;
939 comp_ring->size = ring0->size + ring1->size;
941 /* Rx vmxnet rings structure initialization */
942 ring0->next2fill = 0;
943 ring1->next2fill = 0;
944 ring0->next2comp = 0;
945 ring1->next2comp = 0;
946 ring0->gen = VMXNET3_INIT_GEN;
947 ring1->gen = VMXNET3_INIT_GEN;
948 comp_ring->next2proc = 0;
949 comp_ring->gen = VMXNET3_INIT_GEN;
951 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
952 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
954 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
956 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
959 memset(mz->addr, 0, mz->len);
961 /* cmd_ring0 initialization */
962 ring0->base = mz->addr;
963 ring0->basePA = mz->phys_addr;
965 /* cmd_ring1 initialization */
966 ring1->base = ring0->base + ring0->size;
967 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
969 /* comp_ring initialization */
970 comp_ring->base = ring1->base + ring1->size;
971 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
974 /* cmd_ring0-cmd_ring1 buf_info allocation */
975 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
977 ring = &rxq->cmd_ring[i];
979 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
981 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
982 if (ring->buf_info == NULL) {
983 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
988 /* Update the data portion with rxq */
989 dev->data->rx_queues[queue_idx] = rxq;
995 * Initializes Receive Unit
996 * Load mbufs in rx queue in advance
999 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1001 struct vmxnet3_hw *hw = dev->data->dev_private;
1006 PMD_INIT_FUNC_TRACE();
1008 for (i = 0; i < hw->num_rx_queues; i++) {
1009 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1011 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1012 /* Passing 0 as alloc_num will allocate full ring */
1013 ret = vmxnet3_post_rx_bufs(rxq, j);
1015 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1018 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1019 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1020 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1021 rxq->cmd_ring[j].next2fill);
1024 rxq->stopped = FALSE;
1025 rxq->start_seg = NULL;
1028 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1029 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1031 txq->stopped = FALSE;
1037 static uint8_t rss_intel_key[40] = {
1038 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1039 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1040 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1041 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1042 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1046 * Configure RSS feature
1049 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1051 struct vmxnet3_hw *hw = dev->data->dev_private;
1052 struct VMXNET3_RSSConf *dev_rss_conf;
1053 struct rte_eth_rss_conf *port_rss_conf;
1057 PMD_INIT_FUNC_TRACE();
1059 dev_rss_conf = hw->rss_conf;
1060 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1062 /* loading hashFunc */
1063 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1064 /* loading hashKeySize */
1065 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1066 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1067 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1069 if (port_rss_conf->rss_key == NULL) {
1070 /* Default hash key */
1071 port_rss_conf->rss_key = rss_intel_key;
1074 /* loading hashKey */
1075 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1077 /* loading indTable */
1078 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1079 if (j == dev->data->nb_rx_queues)
1081 dev_rss_conf->indTable[i] = j;
1084 /* loading hashType */
1085 dev_rss_conf->hashType = 0;
1086 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1087 if (rss_hf & ETH_RSS_IPV4)
1088 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1089 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1090 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1091 if (rss_hf & ETH_RSS_IPV6)
1092 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1093 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1094 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1096 return VMXNET3_SUCCESS;