4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
99 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
100 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
102 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
103 (unsigned long)rxq->cmd_ring[0].basePA,
104 (unsigned long)rxq->cmd_ring[1].basePA,
105 (unsigned long)rxq->comp_ring.basePA);
107 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
109 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
110 (uint32_t)rxq->cmd_ring[0].size, avail,
111 rxq->comp_ring.next2proc,
112 rxq->cmd_ring[0].size - avail);
114 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
115 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
116 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
117 rxq->cmd_ring[1].size - avail);
122 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
129 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
130 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
131 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
132 (unsigned long)txq->cmd_ring.basePA,
133 (unsigned long)txq->comp_ring.basePA,
134 (unsigned long)txq->data_ring.basePA);
136 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
137 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
138 (uint32_t)txq->cmd_ring.size, avail,
139 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
144 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
146 while (ring->next2comp != ring->next2fill) {
147 /* No need to worry about tx desc ownership, device is quiesced by now. */
148 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
151 rte_pktmbuf_free(buf_info->m);
156 vmxnet3_cmd_ring_adv_next2comp(ring);
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
163 vmxnet3_cmd_ring_release_mbufs(ring);
164 rte_free(ring->buf_info);
165 ring->buf_info = NULL;
170 vmxnet3_dev_tx_queue_release(void *txq)
172 vmxnet3_tx_queue_t *tq = txq;
175 /* Release the cmd_ring */
176 vmxnet3_cmd_ring_release(&tq->cmd_ring);
181 vmxnet3_dev_rx_queue_release(void *rxq)
184 vmxnet3_rx_queue_t *rq = rxq;
187 /* Release both the cmd_rings */
188 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
189 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
194 vmxnet3_dev_tx_queue_reset(void *txq)
196 vmxnet3_tx_queue_t *tq = txq;
197 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
198 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
199 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
203 /* Release the cmd_ring mbufs */
204 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
207 /* Tx vmxnet rings structure initialization*/
210 ring->gen = VMXNET3_INIT_GEN;
211 comp_ring->next2proc = 0;
212 comp_ring->gen = VMXNET3_INIT_GEN;
214 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
215 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
216 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
218 memset(ring->base, 0, size);
222 vmxnet3_dev_rx_queue_reset(void *rxq)
225 vmxnet3_rx_queue_t *rq = rxq;
226 struct vmxnet3_cmd_ring *ring0, *ring1;
227 struct vmxnet3_comp_ring *comp_ring;
231 /* Release both the cmd_rings mbufs */
232 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
233 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
236 ring0 = &rq->cmd_ring[0];
237 ring1 = &rq->cmd_ring[1];
238 comp_ring = &rq->comp_ring;
240 /* Rx vmxnet rings structure initialization */
241 ring0->next2fill = 0;
242 ring1->next2fill = 0;
243 ring0->next2comp = 0;
244 ring1->next2comp = 0;
245 ring0->gen = VMXNET3_INIT_GEN;
246 ring1->gen = VMXNET3_INIT_GEN;
247 comp_ring->next2proc = 0;
248 comp_ring->gen = VMXNET3_INIT_GEN;
250 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
251 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
253 memset(ring0->base, 0, size);
257 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
261 PMD_INIT_FUNC_TRACE();
263 for (i = 0; i < dev->data->nb_tx_queues; i++) {
264 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
268 vmxnet3_dev_tx_queue_reset(txq);
272 for (i = 0; i < dev->data->nb_rx_queues; i++) {
273 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
277 vmxnet3_dev_rx_queue_reset(rxq);
283 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
286 struct rte_mbuf *mbuf;
288 /* Release cmd_ring descriptor and free mbuf */
289 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
291 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
293 rte_panic("EOP desc does not point to a valid mbuf");
294 rte_pktmbuf_free(mbuf);
296 txq->cmd_ring.buf_info[eop_idx].m = NULL;
298 while (txq->cmd_ring.next2comp != eop_idx) {
299 /* no out-of-order completion */
300 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
301 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
305 /* Mark the txd for which tcd was generated as completed */
306 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
308 return completed + 1;
312 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
315 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
316 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
317 (comp_ring->base + comp_ring->next2proc);
319 while (tcd->gen == comp_ring->gen) {
320 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
322 vmxnet3_comp_ring_adv_next2proc(comp_ring);
323 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
324 comp_ring->next2proc);
327 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
331 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
335 vmxnet3_tx_queue_t *txq = tx_queue;
336 struct vmxnet3_hw *hw = txq->hw;
337 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
338 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
340 if (unlikely(txq->stopped)) {
341 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
345 /* Free up the comp_descriptors aggressively */
346 vmxnet3_tq_tx_complete(txq);
349 while (nb_tx < nb_pkts) {
350 Vmxnet3_GenericDesc *gdesc;
351 vmxnet3_buf_info_t *tbi;
352 uint32_t first2fill, avail, dw2;
353 struct rte_mbuf *txm = tx_pkts[nb_tx];
354 struct rte_mbuf *m_seg = txm;
356 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
357 /* # of descriptors needed for a packet. */
358 unsigned count = txm->nb_segs;
360 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
362 /* Is command ring full? */
363 if (unlikely(avail == 0)) {
364 PMD_TX_LOG(DEBUG, "No free ring descriptors");
365 txq->stats.tx_ring_full++;
366 txq->stats.drop_total += (nb_pkts - nb_tx);
370 /* Command ring is not full but cannot handle the
371 * multi-segmented packet. Let's try the next packet
374 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
375 "(avail %d needed %d)", avail, count);
376 txq->stats.drop_total++;
378 txq->stats.drop_tso++;
379 rte_pktmbuf_free(txm);
384 /* Drop non-TSO packet that is excessively fragmented */
385 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
386 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
387 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
388 txq->stats.drop_too_many_segs++;
389 txq->stats.drop_total++;
390 rte_pktmbuf_free(txm);
395 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
396 struct Vmxnet3_TxDataDesc *tdd;
398 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
399 copy_size = rte_pktmbuf_pkt_len(txm);
400 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
403 /* use the previous gen bit for the SOP desc */
404 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
405 first2fill = txq->cmd_ring.next2fill;
407 /* Remember the transmit buffer for cleanup */
408 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
410 /* NB: the following assumes that VMXNET3 maximum
411 * transmit buffer size (16K) is greater than
412 * maximum size of mbuf segment size.
414 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
416 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
417 txq->cmd_ring.next2fill *
418 sizeof(struct Vmxnet3_TxDataDesc));
420 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
422 gdesc->dword[2] = dw2 | m_seg->data_len;
425 /* move to the next2fill descriptor */
426 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
428 /* use the right gen for non-SOP desc */
429 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
430 } while ((m_seg = m_seg->next) != NULL);
432 /* set the last buf_info for the pkt */
434 /* Update the EOP descriptor */
435 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
437 /* Add VLAN tag if present */
438 gdesc = txq->cmd_ring.base + first2fill;
439 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
441 gdesc->txd.tci = txm->vlan_tci;
445 uint16_t mss = txm->tso_segsz;
449 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
450 gdesc->txd.om = VMXNET3_OM_TSO;
451 gdesc->txd.msscof = mss;
453 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
454 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
455 gdesc->txd.om = VMXNET3_OM_CSUM;
456 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
458 switch (txm->ol_flags & PKT_TX_L4_MASK) {
459 case PKT_TX_TCP_CKSUM:
460 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
462 case PKT_TX_UDP_CKSUM:
463 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
466 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
467 txm->ol_flags & PKT_TX_L4_MASK);
473 gdesc->txd.om = VMXNET3_OM_NONE;
474 gdesc->txd.msscof = 0;
478 /* flip the GEN bit on the SOP */
479 rte_compiler_barrier();
480 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
482 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
486 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
488 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
489 txq_ctrl->txNumDeferred = 0;
490 /* Notify vSwitch that packets are available. */
491 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
492 txq->cmd_ring.next2fill);
499 * Allocates mbufs and clusters. Post rx descriptors with buffer details
500 * so that device can receive packets in those buffers.
502 * Among the two rings, 1st ring contains buffers of type 0 and type1.
503 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
504 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
505 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
510 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
513 uint32_t i = 0, val = 0;
514 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
517 /* Usually: One HEAD type buf per packet
518 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
519 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
522 /* We use single packet buffer so all heads here */
523 val = VMXNET3_RXD_BTYPE_HEAD;
525 /* All BODY type buffers for 2nd ring */
526 val = VMXNET3_RXD_BTYPE_BODY;
529 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
530 struct Vmxnet3_RxDesc *rxd;
531 struct rte_mbuf *mbuf;
532 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
534 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
536 /* Allocate blank mbuf for the current Rx Descriptor */
537 mbuf = rte_mbuf_raw_alloc(rxq->mp);
538 if (unlikely(mbuf == NULL)) {
539 PMD_RX_LOG(ERR, "Error allocating mbuf");
540 rxq->stats.rx_buf_alloc_failure++;
546 * Load mbuf pointer into buf_info[ring_size]
547 * buf_info structure is equivalent to cookie for virtio-virtqueue
550 buf_info->len = (uint16_t)(mbuf->buf_len -
551 RTE_PKTMBUF_HEADROOM);
553 rte_mbuf_data_dma_addr_default(mbuf);
555 /* Load Rx Descriptor with the buffer's GPA */
556 rxd->addr = buf_info->bufPA;
558 /* After this point rxd->addr MUST not be NULL */
560 rxd->len = buf_info->len;
561 /* Flip gen bit at the end to change ownership */
562 rxd->gen = ring->gen;
564 vmxnet3_cmd_ring_adv_next2fill(ring);
568 /* Return error only if no buffers are posted at present */
569 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
576 /* Receive side checksum and other offloads */
578 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
580 /* Check for hardware stripped VLAN tag */
582 rxm->ol_flags |= PKT_RX_VLAN_PKT;
583 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
587 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
588 rxm->ol_flags |= PKT_RX_RSS_HASH;
589 rxm->hash.rss = rcd->rssHash;
592 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
594 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
595 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
597 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
598 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
600 rxm->packet_type = RTE_PTYPE_L3_IPV4;
604 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
606 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
607 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
613 * Process the Rx Completion Ring of given vmxnet3_rx_queue
614 * for nb_pkts burst and return the number of packets received
617 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
620 uint32_t nb_rxd, idx;
622 vmxnet3_rx_queue_t *rxq;
623 Vmxnet3_RxCompDesc *rcd;
624 vmxnet3_buf_info_t *rbi;
626 struct rte_mbuf *rxm = NULL;
627 struct vmxnet3_hw *hw;
637 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
639 if (unlikely(rxq->stopped)) {
640 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
644 while (rcd->gen == rxq->comp_ring.gen) {
645 if (nb_rx >= nb_pkts)
649 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
650 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
651 RTE_SET_USED(rxd); /* used only for assert when enabled */
652 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
654 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
656 RTE_ASSERT(rcd->len <= rxd->len);
659 /* Get the packet buffer pointer from buf_info */
662 /* Clear descriptor associated buf_info to be reused */
666 /* Update the index that we received a packet */
667 rxq->cmd_ring[ring_idx].next2comp = idx;
669 /* For RCD with EOP set, check if there is frame error */
670 if (unlikely(rcd->eop && rcd->err)) {
671 rxq->stats.drop_total++;
672 rxq->stats.drop_err++;
675 rxq->stats.drop_fcs++;
676 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
678 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
679 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
680 rxq->comp_ring.base), rcd->rxdIdx);
681 rte_pktmbuf_free_seg(rxm);
686 /* Initialize newly received packet buffer */
687 rxm->port = rxq->port_id;
690 rxm->pkt_len = (uint16_t)rcd->len;
691 rxm->data_len = (uint16_t)rcd->len;
692 rxm->data_off = RTE_PKTMBUF_HEADROOM;
697 * If this is the first buffer of the received packet,
698 * set the pointer to the first mbuf of the packet
699 * Otherwise, update the total length and the number of segments
700 * of the current scattered packet, and update the pointer to
701 * the last mbuf of the current packet.
704 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
706 if (unlikely(rcd->len == 0)) {
707 RTE_ASSERT(rcd->eop);
710 "Rx buf was skipped. rxring[%d][%d])",
712 rte_pktmbuf_free_seg(rxm);
716 rxq->start_seg = rxm;
717 vmxnet3_rx_offload(rcd, rxm);
719 struct rte_mbuf *start = rxq->start_seg;
721 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
723 start->pkt_len += rxm->data_len;
726 rxq->last_seg->next = rxm;
731 rx_pkts[nb_rx++] = rxq->start_seg;
732 rxq->start_seg = NULL;
736 rxq->cmd_ring[ring_idx].next2comp = idx;
737 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
739 /* It's time to allocate some new buf and renew descriptors */
740 vmxnet3_post_rx_bufs(rxq, ring_idx);
741 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
742 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
743 rxq->cmd_ring[ring_idx].next2fill);
746 /* Advance to the next descriptor in comp_ring */
747 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
749 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
751 if (nb_rxd > rxq->cmd_ring[0].size) {
753 "Used up quota of receiving packets,"
754 " relinquish control.");
763 * Create memzone for device rings. malloc can't be used as the physical address is
764 * needed. If the memzone is already created, then this function returns a ptr
767 static const struct rte_memzone *
768 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
769 uint16_t queue_id, uint32_t ring_size, int socket_id)
771 char z_name[RTE_MEMZONE_NAMESIZE];
772 const struct rte_memzone *mz;
774 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
775 dev->driver->pci_drv.name, ring_name,
776 dev->data->port_id, queue_id);
778 mz = rte_memzone_lookup(z_name);
782 return rte_memzone_reserve_aligned(z_name, ring_size,
783 socket_id, 0, VMXNET3_RING_BA_ALIGN);
787 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
790 unsigned int socket_id,
791 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
793 struct vmxnet3_hw *hw = dev->data->dev_private;
794 const struct rte_memzone *mz;
795 struct vmxnet3_tx_queue *txq;
796 struct vmxnet3_cmd_ring *ring;
797 struct vmxnet3_comp_ring *comp_ring;
798 struct vmxnet3_data_ring *data_ring;
801 PMD_INIT_FUNC_TRACE();
803 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
804 ETH_TXQ_FLAGS_NOXSUMSCTP) {
805 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
809 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
811 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
815 txq->queue_id = queue_idx;
816 txq->port_id = dev->data->port_id;
817 txq->shared = &hw->tqd_start[queue_idx];
819 txq->qid = queue_idx;
822 ring = &txq->cmd_ring;
823 comp_ring = &txq->comp_ring;
824 data_ring = &txq->data_ring;
826 /* Tx vmxnet ring length should be between 512-4096 */
827 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
828 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
829 VMXNET3_DEF_TX_RING_SIZE);
831 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
832 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
833 VMXNET3_TX_RING_MAX_SIZE);
836 ring->size = nb_desc;
837 ring->size &= ~VMXNET3_RING_SIZE_MASK;
839 comp_ring->size = data_ring->size = ring->size;
841 /* Tx vmxnet rings structure initialization*/
844 ring->gen = VMXNET3_INIT_GEN;
845 comp_ring->next2proc = 0;
846 comp_ring->gen = VMXNET3_INIT_GEN;
848 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
849 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
850 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
852 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
854 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
857 memset(mz->addr, 0, mz->len);
859 /* cmd_ring initialization */
860 ring->base = mz->addr;
861 ring->basePA = mz->phys_addr;
863 /* comp_ring initialization */
864 comp_ring->base = ring->base + ring->size;
865 comp_ring->basePA = ring->basePA +
866 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
868 /* data_ring initialization */
869 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
870 data_ring->basePA = comp_ring->basePA +
871 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
873 /* cmd_ring0 buf_info allocation */
874 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
875 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
876 if (ring->buf_info == NULL) {
877 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
881 /* Update the data portion with txq */
882 dev->data->tx_queues[queue_idx] = txq;
888 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
891 unsigned int socket_id,
892 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
893 struct rte_mempool *mp)
895 const struct rte_memzone *mz;
896 struct vmxnet3_rx_queue *rxq;
897 struct vmxnet3_hw *hw = dev->data->dev_private;
898 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
899 struct vmxnet3_comp_ring *comp_ring;
904 PMD_INIT_FUNC_TRACE();
906 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
908 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
913 rxq->queue_id = queue_idx;
914 rxq->port_id = dev->data->port_id;
915 rxq->shared = &hw->rqd_start[queue_idx];
917 rxq->qid1 = queue_idx;
918 rxq->qid2 = queue_idx + hw->num_rx_queues;
921 ring0 = &rxq->cmd_ring[0];
922 ring1 = &rxq->cmd_ring[1];
923 comp_ring = &rxq->comp_ring;
925 /* Rx vmxnet rings length should be between 256-4096 */
926 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
927 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
929 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
930 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
933 ring0->size = nb_desc;
934 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
935 ring1->size = ring0->size;
938 comp_ring->size = ring0->size + ring1->size;
940 /* Rx vmxnet rings structure initialization */
941 ring0->next2fill = 0;
942 ring1->next2fill = 0;
943 ring0->next2comp = 0;
944 ring1->next2comp = 0;
945 ring0->gen = VMXNET3_INIT_GEN;
946 ring1->gen = VMXNET3_INIT_GEN;
947 comp_ring->next2proc = 0;
948 comp_ring->gen = VMXNET3_INIT_GEN;
950 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
951 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
953 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
955 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
958 memset(mz->addr, 0, mz->len);
960 /* cmd_ring0 initialization */
961 ring0->base = mz->addr;
962 ring0->basePA = mz->phys_addr;
964 /* cmd_ring1 initialization */
965 ring1->base = ring0->base + ring0->size;
966 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
968 /* comp_ring initialization */
969 comp_ring->base = ring1->base + ring1->size;
970 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
973 /* cmd_ring0-cmd_ring1 buf_info allocation */
974 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
976 ring = &rxq->cmd_ring[i];
978 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
980 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
981 if (ring->buf_info == NULL) {
982 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
987 /* Update the data portion with rxq */
988 dev->data->rx_queues[queue_idx] = rxq;
994 * Initializes Receive Unit
995 * Load mbufs in rx queue in advance
998 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1000 struct vmxnet3_hw *hw = dev->data->dev_private;
1005 PMD_INIT_FUNC_TRACE();
1007 for (i = 0; i < hw->num_rx_queues; i++) {
1008 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1010 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1011 /* Passing 0 as alloc_num will allocate full ring */
1012 ret = vmxnet3_post_rx_bufs(rxq, j);
1014 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1017 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1018 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1019 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1020 rxq->cmd_ring[j].next2fill);
1023 rxq->stopped = FALSE;
1024 rxq->start_seg = NULL;
1027 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1028 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1030 txq->stopped = FALSE;
1036 static uint8_t rss_intel_key[40] = {
1037 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1038 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1039 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1040 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1041 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1045 * Configure RSS feature
1048 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1050 struct vmxnet3_hw *hw = dev->data->dev_private;
1051 struct VMXNET3_RSSConf *dev_rss_conf;
1052 struct rte_eth_rss_conf *port_rss_conf;
1056 PMD_INIT_FUNC_TRACE();
1058 dev_rss_conf = hw->rss_conf;
1059 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1061 /* loading hashFunc */
1062 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1063 /* loading hashKeySize */
1064 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1065 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1066 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1068 if (port_rss_conf->rss_key == NULL) {
1069 /* Default hash key */
1070 port_rss_conf->rss_key = rss_intel_key;
1073 /* loading hashKey */
1074 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1076 /* loading indTable */
1077 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1078 if (j == dev->data->nb_rx_queues)
1080 dev_rss_conf->indTable[i] = j;
1083 /* loading hashType */
1084 dev_rss_conf->hashType = 0;
1085 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1086 if (rss_hf & ETH_RSS_IPV4)
1087 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1088 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1089 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1090 if (rss_hf & ETH_RSS_IPV6)
1091 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1092 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1093 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1095 return VMXNET3_SUCCESS;