4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, 0);
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
112 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113 (unsigned long)rxq->cmd_ring[0].basePA,
114 (unsigned long)rxq->cmd_ring[1].basePA,
115 (unsigned long)rxq->comp_ring.basePA);
117 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
119 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120 (uint32_t)rxq->cmd_ring[0].size, avail,
121 rxq->comp_ring.next2proc,
122 rxq->cmd_ring[0].size - avail);
124 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127 rxq->cmd_ring[1].size - avail);
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142 (unsigned long)txq->cmd_ring.basePA,
143 (unsigned long)txq->comp_ring.basePA,
144 (unsigned long)txq->data_ring.basePA);
146 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148 (uint32_t)txq->cmd_ring.size, avail,
149 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
156 while (ring->next2comp != ring->next2fill) {
157 /* No need to worry about tx desc ownership, device is quiesced by now. */
158 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
161 rte_pktmbuf_free(buf_info->m);
166 vmxnet3_cmd_ring_adv_next2comp(ring);
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
173 vmxnet3_cmd_ring_release_mbufs(ring);
174 rte_free(ring->buf_info);
175 ring->buf_info = NULL;
180 vmxnet3_dev_tx_queue_release(void *txq)
182 vmxnet3_tx_queue_t *tq = txq;
185 /* Release the cmd_ring */
186 vmxnet3_cmd_ring_release(&tq->cmd_ring);
191 vmxnet3_dev_rx_queue_release(void *rxq)
194 vmxnet3_rx_queue_t *rq = rxq;
197 /* Release both the cmd_rings */
198 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
204 vmxnet3_dev_tx_queue_reset(void *txq)
206 vmxnet3_tx_queue_t *tq = txq;
207 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 /* Release the cmd_ring mbufs */
214 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
217 /* Tx vmxnet rings structure initialization*/
220 ring->gen = VMXNET3_INIT_GEN;
221 comp_ring->next2proc = 0;
222 comp_ring->gen = VMXNET3_INIT_GEN;
224 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
228 memset(ring->base, 0, size);
232 vmxnet3_dev_rx_queue_reset(void *rxq)
235 vmxnet3_rx_queue_t *rq = rxq;
236 struct vmxnet3_cmd_ring *ring0, *ring1;
237 struct vmxnet3_comp_ring *comp_ring;
241 /* Release both the cmd_rings mbufs */
242 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
246 ring0 = &rq->cmd_ring[0];
247 ring1 = &rq->cmd_ring[1];
248 comp_ring = &rq->comp_ring;
250 /* Rx vmxnet rings structure initialization */
251 ring0->next2fill = 0;
252 ring1->next2fill = 0;
253 ring0->next2comp = 0;
254 ring1->next2comp = 0;
255 ring0->gen = VMXNET3_INIT_GEN;
256 ring1->gen = VMXNET3_INIT_GEN;
257 comp_ring->next2proc = 0;
258 comp_ring->gen = VMXNET3_INIT_GEN;
260 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
263 memset(ring0->base, 0, size);
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
271 PMD_INIT_FUNC_TRACE();
273 for (i = 0; i < dev->data->nb_tx_queues; i++) {
274 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
278 vmxnet3_dev_tx_queue_reset(txq);
282 for (i = 0; i < dev->data->nb_rx_queues; i++) {
283 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
287 vmxnet3_dev_rx_queue_reset(rxq);
293 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
296 struct rte_mbuf *mbuf;
298 /* Release cmd_ring descriptor and free mbuf */
299 VMXNET3_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
301 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
303 rte_panic("EOP desc does not point to a valid mbuf");
304 rte_pktmbuf_free(mbuf);
306 txq->cmd_ring.buf_info[eop_idx].m = NULL;
308 while (txq->cmd_ring.next2comp != eop_idx) {
309 /* no out-of-order completion */
310 VMXNET3_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
311 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
315 /* Mark the txd for which tcd was generated as completed */
316 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318 return completed + 1;
322 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
325 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
326 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
327 (comp_ring->base + comp_ring->next2proc);
329 while (tcd->gen == comp_ring->gen) {
330 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
332 vmxnet3_comp_ring_adv_next2proc(comp_ring);
333 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
334 comp_ring->next2proc);
337 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
341 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
345 vmxnet3_tx_queue_t *txq = tx_queue;
346 struct vmxnet3_hw *hw = txq->hw;
347 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
348 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
350 if (unlikely(txq->stopped)) {
351 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
355 /* Free up the comp_descriptors aggressively */
356 vmxnet3_tq_tx_complete(txq);
359 while (nb_tx < nb_pkts) {
360 Vmxnet3_GenericDesc *gdesc;
361 vmxnet3_buf_info_t *tbi;
362 uint32_t first2fill, avail, dw2;
363 struct rte_mbuf *txm = tx_pkts[nb_tx];
364 struct rte_mbuf *m_seg = txm;
366 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
367 /* # of descriptors needed for a packet. */
368 unsigned count = txm->nb_segs;
370 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
372 /* Is command ring full? */
373 if (unlikely(avail == 0)) {
374 PMD_TX_LOG(DEBUG, "No free ring descriptors");
375 txq->stats.tx_ring_full++;
376 txq->stats.drop_total += (nb_pkts - nb_tx);
380 /* Command ring is not full but cannot handle the
381 * multi-segmented packet. Let's try the next packet
384 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
385 "(avail %d needed %d)", avail, count);
386 txq->stats.drop_total++;
388 txq->stats.drop_tso++;
389 rte_pktmbuf_free(txm);
394 /* Drop non-TSO packet that is excessively fragmented */
395 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
396 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
397 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
398 txq->stats.drop_too_many_segs++;
399 txq->stats.drop_total++;
400 rte_pktmbuf_free(txm);
405 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
406 struct Vmxnet3_TxDataDesc *tdd;
408 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
409 copy_size = rte_pktmbuf_pkt_len(txm);
410 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
413 /* use the previous gen bit for the SOP desc */
414 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
415 first2fill = txq->cmd_ring.next2fill;
417 /* Remember the transmit buffer for cleanup */
418 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
420 /* NB: the following assumes that VMXNET3 maximum
421 * transmit buffer size (16K) is greater than
422 * maximum size of mbuf segment size.
424 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
426 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
427 txq->cmd_ring.next2fill *
428 sizeof(struct Vmxnet3_TxDataDesc));
430 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
432 gdesc->dword[2] = dw2 | m_seg->data_len;
435 /* move to the next2fill descriptor */
436 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
438 /* use the right gen for non-SOP desc */
439 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
440 } while ((m_seg = m_seg->next) != NULL);
442 /* set the last buf_info for the pkt */
444 /* Update the EOP descriptor */
445 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
447 /* Add VLAN tag if present */
448 gdesc = txq->cmd_ring.base + first2fill;
449 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
451 gdesc->txd.tci = txm->vlan_tci;
455 uint16_t mss = txm->tso_segsz;
457 VMXNET3_ASSERT(mss > 0);
459 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
460 gdesc->txd.om = VMXNET3_OM_TSO;
461 gdesc->txd.msscof = mss;
463 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
464 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
465 gdesc->txd.om = VMXNET3_OM_CSUM;
466 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
468 switch (txm->ol_flags & PKT_TX_L4_MASK) {
469 case PKT_TX_TCP_CKSUM:
470 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
472 case PKT_TX_UDP_CKSUM:
473 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
476 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
477 txm->ol_flags & PKT_TX_L4_MASK);
483 gdesc->txd.om = VMXNET3_OM_NONE;
484 gdesc->txd.msscof = 0;
488 /* flip the GEN bit on the SOP */
489 rte_compiler_barrier();
490 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
492 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
496 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
498 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
499 txq_ctrl->txNumDeferred = 0;
500 /* Notify vSwitch that packets are available. */
501 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
502 txq->cmd_ring.next2fill);
509 * Allocates mbufs and clusters. Post rx descriptors with buffer details
510 * so that device can receive packets in those buffers.
512 * Among the two rings, 1st ring contains buffers of type 0 and type1.
513 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
514 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
515 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
520 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
523 uint32_t i = 0, val = 0;
524 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
527 /* Usually: One HEAD type buf per packet
528 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
529 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
532 /* We use single packet buffer so all heads here */
533 val = VMXNET3_RXD_BTYPE_HEAD;
535 /* All BODY type buffers for 2nd ring */
536 val = VMXNET3_RXD_BTYPE_BODY;
539 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
540 struct Vmxnet3_RxDesc *rxd;
541 struct rte_mbuf *mbuf;
542 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
544 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
546 /* Allocate blank mbuf for the current Rx Descriptor */
547 mbuf = rte_rxmbuf_alloc(rxq->mp);
548 if (unlikely(mbuf == NULL)) {
549 PMD_RX_LOG(ERR, "Error allocating mbuf");
550 rxq->stats.rx_buf_alloc_failure++;
556 * Load mbuf pointer into buf_info[ring_size]
557 * buf_info structure is equivalent to cookie for virtio-virtqueue
560 buf_info->len = (uint16_t)(mbuf->buf_len -
561 RTE_PKTMBUF_HEADROOM);
563 rte_mbuf_data_dma_addr_default(mbuf);
565 /* Load Rx Descriptor with the buffer's GPA */
566 rxd->addr = buf_info->bufPA;
568 /* After this point rxd->addr MUST not be NULL */
570 rxd->len = buf_info->len;
571 /* Flip gen bit at the end to change ownership */
572 rxd->gen = ring->gen;
574 vmxnet3_cmd_ring_adv_next2fill(ring);
578 /* Return error only if no buffers are posted at present */
579 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
586 /* Receive side checksum and other offloads */
588 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
590 /* Check for hardware stripped VLAN tag */
592 rxm->ol_flags |= PKT_RX_VLAN_PKT;
593 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
597 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
598 rxm->ol_flags |= PKT_RX_RSS_HASH;
599 rxm->hash.rss = rcd->rssHash;
602 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
604 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
605 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
607 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
608 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
610 rxm->packet_type = RTE_PTYPE_L3_IPV4;
614 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
616 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
617 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
623 * Process the Rx Completion Ring of given vmxnet3_rx_queue
624 * for nb_pkts burst and return the number of packets received
627 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
630 uint32_t nb_rxd, idx;
632 vmxnet3_rx_queue_t *rxq;
633 Vmxnet3_RxCompDesc *rcd;
634 vmxnet3_buf_info_t *rbi;
636 struct rte_mbuf *rxm = NULL;
637 struct vmxnet3_hw *hw;
647 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
649 if (unlikely(rxq->stopped)) {
650 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
654 while (rcd->gen == rxq->comp_ring.gen) {
655 if (nb_rx >= nb_pkts)
659 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
660 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
661 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
663 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
664 rte_pktmbuf_free_seg(rbi->m);
665 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
669 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
671 VMXNET3_ASSERT(rcd->len <= rxd->len);
672 VMXNET3_ASSERT(rbi->m);
674 if (unlikely(rcd->len == 0)) {
675 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
677 VMXNET3_ASSERT(rcd->sop && rcd->eop);
678 rte_pktmbuf_free_seg(rbi->m);
682 /* Assuming a packet is coming in a single packet buffer */
683 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
685 "Alert : Misbehaving device, incorrect "
686 " buffer type used. Packet dropped.");
687 rte_pktmbuf_free_seg(rbi->m);
690 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
692 /* Get the packet buffer pointer from buf_info */
695 /* Clear descriptor associated buf_info to be reused */
699 /* Update the index that we received a packet */
700 rxq->cmd_ring[ring_idx].next2comp = idx;
702 /* For RCD with EOP set, check if there is frame error */
703 if (unlikely(rcd->err)) {
704 rxq->stats.drop_total++;
705 rxq->stats.drop_err++;
708 rxq->stats.drop_fcs++;
709 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
711 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
712 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
713 rxq->comp_ring.base), rcd->rxdIdx);
714 rte_pktmbuf_free_seg(rxm);
719 /* Initialize newly received packet buffer */
720 rxm->port = rxq->port_id;
723 rxm->pkt_len = (uint16_t)rcd->len;
724 rxm->data_len = (uint16_t)rcd->len;
725 rxm->data_off = RTE_PKTMBUF_HEADROOM;
729 vmxnet3_rx_offload(rcd, rxm);
731 rx_pkts[nb_rx++] = rxm;
733 rxq->cmd_ring[ring_idx].next2comp = idx;
734 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
736 /* It's time to allocate some new buf and renew descriptors */
737 vmxnet3_post_rx_bufs(rxq, ring_idx);
738 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
739 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
740 rxq->cmd_ring[ring_idx].next2fill);
743 /* Advance to the next descriptor in comp_ring */
744 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
746 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
748 if (nb_rxd > rxq->cmd_ring[0].size) {
750 "Used up quota of receiving packets,"
751 " relinquish control.");
760 * Create memzone for device rings. malloc can't be used as the physical address is
761 * needed. If the memzone is already created, then this function returns a ptr
764 static const struct rte_memzone *
765 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
766 uint16_t queue_id, uint32_t ring_size, int socket_id)
768 char z_name[RTE_MEMZONE_NAMESIZE];
769 const struct rte_memzone *mz;
771 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
772 dev->driver->pci_drv.name, ring_name,
773 dev->data->port_id, queue_id);
775 mz = rte_memzone_lookup(z_name);
779 return rte_memzone_reserve_aligned(z_name, ring_size,
780 socket_id, 0, VMXNET3_RING_BA_ALIGN);
784 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
787 unsigned int socket_id,
788 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
790 struct vmxnet3_hw *hw = dev->data->dev_private;
791 const struct rte_memzone *mz;
792 struct vmxnet3_tx_queue *txq;
793 struct vmxnet3_cmd_ring *ring;
794 struct vmxnet3_comp_ring *comp_ring;
795 struct vmxnet3_data_ring *data_ring;
798 PMD_INIT_FUNC_TRACE();
800 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
801 ETH_TXQ_FLAGS_NOXSUMSCTP) {
802 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
806 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
808 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
812 txq->queue_id = queue_idx;
813 txq->port_id = dev->data->port_id;
814 txq->shared = &hw->tqd_start[queue_idx];
816 txq->qid = queue_idx;
819 ring = &txq->cmd_ring;
820 comp_ring = &txq->comp_ring;
821 data_ring = &txq->data_ring;
823 /* Tx vmxnet ring length should be between 512-4096 */
824 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
825 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
826 VMXNET3_DEF_TX_RING_SIZE);
828 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
829 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
830 VMXNET3_TX_RING_MAX_SIZE);
833 ring->size = nb_desc;
834 ring->size &= ~VMXNET3_RING_SIZE_MASK;
836 comp_ring->size = data_ring->size = ring->size;
838 /* Tx vmxnet rings structure initialization*/
841 ring->gen = VMXNET3_INIT_GEN;
842 comp_ring->next2proc = 0;
843 comp_ring->gen = VMXNET3_INIT_GEN;
845 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
846 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
847 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
849 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
851 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
854 memset(mz->addr, 0, mz->len);
856 /* cmd_ring initialization */
857 ring->base = mz->addr;
858 ring->basePA = mz->phys_addr;
860 /* comp_ring initialization */
861 comp_ring->base = ring->base + ring->size;
862 comp_ring->basePA = ring->basePA +
863 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
865 /* data_ring initialization */
866 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
867 data_ring->basePA = comp_ring->basePA +
868 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
870 /* cmd_ring0 buf_info allocation */
871 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
872 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
873 if (ring->buf_info == NULL) {
874 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
878 /* Update the data portion with txq */
879 dev->data->tx_queues[queue_idx] = txq;
885 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
888 unsigned int socket_id,
889 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
890 struct rte_mempool *mp)
892 const struct rte_memzone *mz;
893 struct vmxnet3_rx_queue *rxq;
894 struct vmxnet3_hw *hw = dev->data->dev_private;
895 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
896 struct vmxnet3_comp_ring *comp_ring;
902 PMD_INIT_FUNC_TRACE();
904 buf_size = rte_pktmbuf_data_room_size(mp) -
905 RTE_PKTMBUF_HEADROOM;
907 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
908 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
909 "VMXNET3 don't support scatter packets yet",
910 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
914 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
916 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
921 rxq->queue_id = queue_idx;
922 rxq->port_id = dev->data->port_id;
923 rxq->shared = &hw->rqd_start[queue_idx];
925 rxq->qid1 = queue_idx;
926 rxq->qid2 = queue_idx + hw->num_rx_queues;
929 ring0 = &rxq->cmd_ring[0];
930 ring1 = &rxq->cmd_ring[1];
931 comp_ring = &rxq->comp_ring;
933 /* Rx vmxnet rings length should be between 256-4096 */
934 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
935 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
937 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
938 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
941 ring0->size = nb_desc;
942 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
943 ring1->size = ring0->size;
946 comp_ring->size = ring0->size + ring1->size;
948 /* Rx vmxnet rings structure initialization */
949 ring0->next2fill = 0;
950 ring1->next2fill = 0;
951 ring0->next2comp = 0;
952 ring1->next2comp = 0;
953 ring0->gen = VMXNET3_INIT_GEN;
954 ring1->gen = VMXNET3_INIT_GEN;
955 comp_ring->next2proc = 0;
956 comp_ring->gen = VMXNET3_INIT_GEN;
958 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
959 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
961 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
963 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
966 memset(mz->addr, 0, mz->len);
968 /* cmd_ring0 initialization */
969 ring0->base = mz->addr;
970 ring0->basePA = mz->phys_addr;
972 /* cmd_ring1 initialization */
973 ring1->base = ring0->base + ring0->size;
974 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
976 /* comp_ring initialization */
977 comp_ring->base = ring1->base + ring1->size;
978 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
981 /* cmd_ring0-cmd_ring1 buf_info allocation */
982 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
984 ring = &rxq->cmd_ring[i];
986 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
988 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
989 if (ring->buf_info == NULL) {
990 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
995 /* Update the data portion with rxq */
996 dev->data->rx_queues[queue_idx] = rxq;
1002 * Initializes Receive Unit
1003 * Load mbufs in rx queue in advance
1006 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1008 struct vmxnet3_hw *hw = dev->data->dev_private;
1013 PMD_INIT_FUNC_TRACE();
1015 for (i = 0; i < hw->num_rx_queues; i++) {
1016 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1018 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1019 /* Passing 0 as alloc_num will allocate full ring */
1020 ret = vmxnet3_post_rx_bufs(rxq, j);
1022 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1025 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1026 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1027 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1028 rxq->cmd_ring[j].next2fill);
1031 rxq->stopped = FALSE;
1034 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1035 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1037 txq->stopped = FALSE;
1043 static uint8_t rss_intel_key[40] = {
1044 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1045 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1046 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1047 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1048 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1052 * Configure RSS feature
1055 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1057 struct vmxnet3_hw *hw = dev->data->dev_private;
1058 struct VMXNET3_RSSConf *dev_rss_conf;
1059 struct rte_eth_rss_conf *port_rss_conf;
1063 PMD_INIT_FUNC_TRACE();
1065 dev_rss_conf = hw->rss_conf;
1066 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1068 /* loading hashFunc */
1069 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1070 /* loading hashKeySize */
1071 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1072 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1073 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1075 if (port_rss_conf->rss_key == NULL) {
1076 /* Default hash key */
1077 port_rss_conf->rss_key = rss_intel_key;
1080 /* loading hashKey */
1081 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1083 /* loading indTable */
1084 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1085 if (j == dev->data->nb_rx_queues)
1087 dev_rss_conf->indTable[i] = j;
1090 /* loading hashType */
1091 dev_rss_conf->hashType = 0;
1092 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1093 if (rss_hf & ETH_RSS_IPV4)
1094 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1095 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1096 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1097 if (rss_hf & ETH_RSS_IPV6)
1098 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1099 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1100 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1102 return VMXNET3_SUCCESS;