4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
99 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
100 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
102 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
103 (unsigned long)rxq->cmd_ring[0].basePA,
104 (unsigned long)rxq->cmd_ring[1].basePA,
105 (unsigned long)rxq->comp_ring.basePA);
107 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
109 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
110 (uint32_t)rxq->cmd_ring[0].size, avail,
111 rxq->comp_ring.next2proc,
112 rxq->cmd_ring[0].size - avail);
114 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
115 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
116 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
117 rxq->cmd_ring[1].size - avail);
122 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
129 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
130 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
131 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
132 (unsigned long)txq->cmd_ring.basePA,
133 (unsigned long)txq->comp_ring.basePA,
134 (unsigned long)txq->data_ring.basePA);
136 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
137 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
138 (uint32_t)txq->cmd_ring.size, avail,
139 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
144 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
146 while (ring->next2comp != ring->next2fill) {
147 /* No need to worry about tx desc ownership, device is quiesced by now. */
148 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
151 rte_pktmbuf_free(buf_info->m);
156 vmxnet3_cmd_ring_adv_next2comp(ring);
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
163 vmxnet3_cmd_ring_release_mbufs(ring);
164 rte_free(ring->buf_info);
165 ring->buf_info = NULL;
170 vmxnet3_dev_tx_queue_release(void *txq)
172 vmxnet3_tx_queue_t *tq = txq;
175 /* Release the cmd_ring */
176 vmxnet3_cmd_ring_release(&tq->cmd_ring);
181 vmxnet3_dev_rx_queue_release(void *rxq)
184 vmxnet3_rx_queue_t *rq = rxq;
187 /* Release both the cmd_rings */
188 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
189 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
194 vmxnet3_dev_tx_queue_reset(void *txq)
196 vmxnet3_tx_queue_t *tq = txq;
197 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
198 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
199 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
203 /* Release the cmd_ring mbufs */
204 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
207 /* Tx vmxnet rings structure initialization*/
210 ring->gen = VMXNET3_INIT_GEN;
211 comp_ring->next2proc = 0;
212 comp_ring->gen = VMXNET3_INIT_GEN;
214 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
215 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
216 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
218 memset(ring->base, 0, size);
222 vmxnet3_dev_rx_queue_reset(void *rxq)
225 vmxnet3_rx_queue_t *rq = rxq;
226 struct vmxnet3_cmd_ring *ring0, *ring1;
227 struct vmxnet3_comp_ring *comp_ring;
231 /* Release both the cmd_rings mbufs */
232 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
233 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
236 ring0 = &rq->cmd_ring[0];
237 ring1 = &rq->cmd_ring[1];
238 comp_ring = &rq->comp_ring;
240 /* Rx vmxnet rings structure initialization */
241 ring0->next2fill = 0;
242 ring1->next2fill = 0;
243 ring0->next2comp = 0;
244 ring1->next2comp = 0;
245 ring0->gen = VMXNET3_INIT_GEN;
246 ring1->gen = VMXNET3_INIT_GEN;
247 comp_ring->next2proc = 0;
248 comp_ring->gen = VMXNET3_INIT_GEN;
250 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
251 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
253 memset(ring0->base, 0, size);
257 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
261 PMD_INIT_FUNC_TRACE();
263 for (i = 0; i < dev->data->nb_tx_queues; i++) {
264 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
268 vmxnet3_dev_tx_queue_reset(txq);
272 for (i = 0; i < dev->data->nb_rx_queues; i++) {
273 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
277 vmxnet3_dev_rx_queue_reset(rxq);
283 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
286 struct rte_mbuf *mbuf;
288 /* Release cmd_ring descriptor and free mbuf */
289 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
291 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
293 rte_panic("EOP desc does not point to a valid mbuf");
294 rte_pktmbuf_free(mbuf);
296 txq->cmd_ring.buf_info[eop_idx].m = NULL;
298 while (txq->cmd_ring.next2comp != eop_idx) {
299 /* no out-of-order completion */
300 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
301 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
305 /* Mark the txd for which tcd was generated as completed */
306 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
308 return completed + 1;
312 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
315 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
316 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
317 (comp_ring->base + comp_ring->next2proc);
319 while (tcd->gen == comp_ring->gen) {
320 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
322 vmxnet3_comp_ring_adv_next2proc(comp_ring);
323 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
324 comp_ring->next2proc);
327 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
331 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
335 vmxnet3_tx_queue_t *txq = tx_queue;
336 struct vmxnet3_hw *hw = txq->hw;
337 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
338 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
340 if (unlikely(txq->stopped)) {
341 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
345 /* Free up the comp_descriptors aggressively */
346 vmxnet3_tq_tx_complete(txq);
349 while (nb_tx < nb_pkts) {
350 Vmxnet3_GenericDesc *gdesc;
351 vmxnet3_buf_info_t *tbi;
352 uint32_t first2fill, avail, dw2;
353 struct rte_mbuf *txm = tx_pkts[nb_tx];
354 struct rte_mbuf *m_seg = txm;
356 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
357 /* # of descriptors needed for a packet. */
358 unsigned count = txm->nb_segs;
360 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
362 /* Is command ring full? */
363 if (unlikely(avail == 0)) {
364 PMD_TX_LOG(DEBUG, "No free ring descriptors");
365 txq->stats.tx_ring_full++;
366 txq->stats.drop_total += (nb_pkts - nb_tx);
370 /* Command ring is not full but cannot handle the
371 * multi-segmented packet. Let's try the next packet
374 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
375 "(avail %d needed %d)", avail, count);
376 txq->stats.drop_total++;
378 txq->stats.drop_tso++;
379 rte_pktmbuf_free(txm);
384 /* Drop non-TSO packet that is excessively fragmented */
385 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
386 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
387 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
388 txq->stats.drop_too_many_segs++;
389 txq->stats.drop_total++;
390 rte_pktmbuf_free(txm);
395 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
396 struct Vmxnet3_TxDataDesc *tdd;
398 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
399 copy_size = rte_pktmbuf_pkt_len(txm);
400 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
403 /* use the previous gen bit for the SOP desc */
404 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
405 first2fill = txq->cmd_ring.next2fill;
407 /* Remember the transmit buffer for cleanup */
408 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
410 /* NB: the following assumes that VMXNET3 maximum
411 * transmit buffer size (16K) is greater than
412 * maximum size of mbuf segment size.
414 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
416 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
417 txq->cmd_ring.next2fill *
418 sizeof(struct Vmxnet3_TxDataDesc));
420 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
422 gdesc->dword[2] = dw2 | m_seg->data_len;
425 /* move to the next2fill descriptor */
426 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
428 /* use the right gen for non-SOP desc */
429 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
430 } while ((m_seg = m_seg->next) != NULL);
432 /* set the last buf_info for the pkt */
434 /* Update the EOP descriptor */
435 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
437 /* Add VLAN tag if present */
438 gdesc = txq->cmd_ring.base + first2fill;
439 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
441 gdesc->txd.tci = txm->vlan_tci;
445 uint16_t mss = txm->tso_segsz;
449 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
450 gdesc->txd.om = VMXNET3_OM_TSO;
451 gdesc->txd.msscof = mss;
453 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
454 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
455 gdesc->txd.om = VMXNET3_OM_CSUM;
456 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
458 switch (txm->ol_flags & PKT_TX_L4_MASK) {
459 case PKT_TX_TCP_CKSUM:
460 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
462 case PKT_TX_UDP_CKSUM:
463 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
466 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
467 txm->ol_flags & PKT_TX_L4_MASK);
473 gdesc->txd.om = VMXNET3_OM_NONE;
474 gdesc->txd.msscof = 0;
478 /* flip the GEN bit on the SOP */
479 rte_compiler_barrier();
480 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
482 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
486 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
488 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
489 txq_ctrl->txNumDeferred = 0;
490 /* Notify vSwitch that packets are available. */
491 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
492 txq->cmd_ring.next2fill);
499 * Allocates mbufs and clusters. Post rx descriptors with buffer details
500 * so that device can receive packets in those buffers.
502 * Among the two rings, 1st ring contains buffers of type 0 and type1.
503 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
504 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
505 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
510 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
513 uint32_t i = 0, val = 0;
514 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
517 /* Usually: One HEAD type buf per packet
518 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
519 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
522 /* We use single packet buffer so all heads here */
523 val = VMXNET3_RXD_BTYPE_HEAD;
525 /* All BODY type buffers for 2nd ring */
526 val = VMXNET3_RXD_BTYPE_BODY;
529 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
530 struct Vmxnet3_RxDesc *rxd;
531 struct rte_mbuf *mbuf;
532 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
534 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
536 /* Allocate blank mbuf for the current Rx Descriptor */
537 mbuf = rte_mbuf_raw_alloc(rxq->mp);
538 if (unlikely(mbuf == NULL)) {
539 PMD_RX_LOG(ERR, "Error allocating mbuf");
540 rxq->stats.rx_buf_alloc_failure++;
546 * Load mbuf pointer into buf_info[ring_size]
547 * buf_info structure is equivalent to cookie for virtio-virtqueue
550 buf_info->len = (uint16_t)(mbuf->buf_len -
551 RTE_PKTMBUF_HEADROOM);
553 rte_mbuf_data_dma_addr_default(mbuf);
555 /* Load Rx Descriptor with the buffer's GPA */
556 rxd->addr = buf_info->bufPA;
558 /* After this point rxd->addr MUST not be NULL */
560 rxd->len = buf_info->len;
561 /* Flip gen bit at the end to change ownership */
562 rxd->gen = ring->gen;
564 vmxnet3_cmd_ring_adv_next2fill(ring);
568 /* Return error only if no buffers are posted at present */
569 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
576 /* Receive side checksum and other offloads */
578 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
581 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
582 rxm->ol_flags |= PKT_RX_RSS_HASH;
583 rxm->hash.rss = rcd->rssHash;
586 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
588 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
589 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
591 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
592 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
594 rxm->packet_type = RTE_PTYPE_L3_IPV4;
598 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
600 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
601 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
607 * Process the Rx Completion Ring of given vmxnet3_rx_queue
608 * for nb_pkts burst and return the number of packets received
611 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
614 uint32_t nb_rxd, idx;
616 vmxnet3_rx_queue_t *rxq;
617 Vmxnet3_RxCompDesc *rcd;
618 vmxnet3_buf_info_t *rbi;
620 struct rte_mbuf *rxm = NULL;
621 struct vmxnet3_hw *hw;
631 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
633 if (unlikely(rxq->stopped)) {
634 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
638 while (rcd->gen == rxq->comp_ring.gen) {
639 if (nb_rx >= nb_pkts)
643 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
644 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
645 RTE_SET_USED(rxd); /* used only for assert when enabled */
646 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
648 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
650 RTE_ASSERT(rcd->len <= rxd->len);
653 /* Get the packet buffer pointer from buf_info */
656 /* Clear descriptor associated buf_info to be reused */
660 /* Update the index that we received a packet */
661 rxq->cmd_ring[ring_idx].next2comp = idx;
663 /* For RCD with EOP set, check if there is frame error */
664 if (unlikely(rcd->eop && rcd->err)) {
665 rxq->stats.drop_total++;
666 rxq->stats.drop_err++;
669 rxq->stats.drop_fcs++;
670 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
672 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
673 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
674 rxq->comp_ring.base), rcd->rxdIdx);
675 rte_pktmbuf_free_seg(rxm);
680 /* Initialize newly received packet buffer */
681 rxm->port = rxq->port_id;
684 rxm->pkt_len = (uint16_t)rcd->len;
685 rxm->data_len = (uint16_t)rcd->len;
686 rxm->data_off = RTE_PKTMBUF_HEADROOM;
691 * If this is the first buffer of the received packet,
692 * set the pointer to the first mbuf of the packet
693 * Otherwise, update the total length and the number of segments
694 * of the current scattered packet, and update the pointer to
695 * the last mbuf of the current packet.
698 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
700 if (unlikely(rcd->len == 0)) {
701 RTE_ASSERT(rcd->eop);
704 "Rx buf was skipped. rxring[%d][%d])",
706 rte_pktmbuf_free_seg(rxm);
710 rxq->start_seg = rxm;
711 vmxnet3_rx_offload(rcd, rxm);
713 struct rte_mbuf *start = rxq->start_seg;
715 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
717 start->pkt_len += rxm->data_len;
720 rxq->last_seg->next = rxm;
725 struct rte_mbuf *start = rxq->start_seg;
727 /* Check for hardware stripped VLAN tag */
729 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
730 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
733 rx_pkts[nb_rx++] = start;
734 rxq->start_seg = NULL;
738 rxq->cmd_ring[ring_idx].next2comp = idx;
739 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
741 /* It's time to allocate some new buf and renew descriptors */
742 vmxnet3_post_rx_bufs(rxq, ring_idx);
743 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
744 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
745 rxq->cmd_ring[ring_idx].next2fill);
748 /* Advance to the next descriptor in comp_ring */
749 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
751 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
753 if (nb_rxd > rxq->cmd_ring[0].size) {
755 "Used up quota of receiving packets,"
756 " relinquish control.");
765 * Create memzone for device rings. malloc can't be used as the physical address is
766 * needed. If the memzone is already created, then this function returns a ptr
769 static const struct rte_memzone *
770 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
771 uint16_t queue_id, uint32_t ring_size, int socket_id)
773 char z_name[RTE_MEMZONE_NAMESIZE];
774 const struct rte_memzone *mz;
776 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
777 dev->driver->pci_drv.name, ring_name,
778 dev->data->port_id, queue_id);
780 mz = rte_memzone_lookup(z_name);
784 return rte_memzone_reserve_aligned(z_name, ring_size,
785 socket_id, 0, VMXNET3_RING_BA_ALIGN);
789 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
792 unsigned int socket_id,
793 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
795 struct vmxnet3_hw *hw = dev->data->dev_private;
796 const struct rte_memzone *mz;
797 struct vmxnet3_tx_queue *txq;
798 struct vmxnet3_cmd_ring *ring;
799 struct vmxnet3_comp_ring *comp_ring;
800 struct vmxnet3_data_ring *data_ring;
803 PMD_INIT_FUNC_TRACE();
805 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
806 ETH_TXQ_FLAGS_NOXSUMSCTP) {
807 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
811 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
813 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
817 txq->queue_id = queue_idx;
818 txq->port_id = dev->data->port_id;
819 txq->shared = &hw->tqd_start[queue_idx];
821 txq->qid = queue_idx;
824 ring = &txq->cmd_ring;
825 comp_ring = &txq->comp_ring;
826 data_ring = &txq->data_ring;
828 /* Tx vmxnet ring length should be between 512-4096 */
829 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
830 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
831 VMXNET3_DEF_TX_RING_SIZE);
833 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
834 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
835 VMXNET3_TX_RING_MAX_SIZE);
838 ring->size = nb_desc;
839 ring->size &= ~VMXNET3_RING_SIZE_MASK;
841 comp_ring->size = data_ring->size = ring->size;
843 /* Tx vmxnet rings structure initialization*/
846 ring->gen = VMXNET3_INIT_GEN;
847 comp_ring->next2proc = 0;
848 comp_ring->gen = VMXNET3_INIT_GEN;
850 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
851 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
852 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
854 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
856 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
859 memset(mz->addr, 0, mz->len);
861 /* cmd_ring initialization */
862 ring->base = mz->addr;
863 ring->basePA = mz->phys_addr;
865 /* comp_ring initialization */
866 comp_ring->base = ring->base + ring->size;
867 comp_ring->basePA = ring->basePA +
868 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
870 /* data_ring initialization */
871 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
872 data_ring->basePA = comp_ring->basePA +
873 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
875 /* cmd_ring0 buf_info allocation */
876 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
877 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
878 if (ring->buf_info == NULL) {
879 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
883 /* Update the data portion with txq */
884 dev->data->tx_queues[queue_idx] = txq;
890 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
893 unsigned int socket_id,
894 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
895 struct rte_mempool *mp)
897 const struct rte_memzone *mz;
898 struct vmxnet3_rx_queue *rxq;
899 struct vmxnet3_hw *hw = dev->data->dev_private;
900 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
901 struct vmxnet3_comp_ring *comp_ring;
906 PMD_INIT_FUNC_TRACE();
908 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
910 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
915 rxq->queue_id = queue_idx;
916 rxq->port_id = dev->data->port_id;
917 rxq->shared = &hw->rqd_start[queue_idx];
919 rxq->qid1 = queue_idx;
920 rxq->qid2 = queue_idx + hw->num_rx_queues;
923 ring0 = &rxq->cmd_ring[0];
924 ring1 = &rxq->cmd_ring[1];
925 comp_ring = &rxq->comp_ring;
927 /* Rx vmxnet rings length should be between 256-4096 */
928 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
929 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
931 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
932 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
935 ring0->size = nb_desc;
936 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
937 ring1->size = ring0->size;
940 comp_ring->size = ring0->size + ring1->size;
942 /* Rx vmxnet rings structure initialization */
943 ring0->next2fill = 0;
944 ring1->next2fill = 0;
945 ring0->next2comp = 0;
946 ring1->next2comp = 0;
947 ring0->gen = VMXNET3_INIT_GEN;
948 ring1->gen = VMXNET3_INIT_GEN;
949 comp_ring->next2proc = 0;
950 comp_ring->gen = VMXNET3_INIT_GEN;
952 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
953 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
955 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
957 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
960 memset(mz->addr, 0, mz->len);
962 /* cmd_ring0 initialization */
963 ring0->base = mz->addr;
964 ring0->basePA = mz->phys_addr;
966 /* cmd_ring1 initialization */
967 ring1->base = ring0->base + ring0->size;
968 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
970 /* comp_ring initialization */
971 comp_ring->base = ring1->base + ring1->size;
972 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
975 /* cmd_ring0-cmd_ring1 buf_info allocation */
976 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
978 ring = &rxq->cmd_ring[i];
980 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
982 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
983 if (ring->buf_info == NULL) {
984 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
989 /* Update the data portion with rxq */
990 dev->data->rx_queues[queue_idx] = rxq;
996 * Initializes Receive Unit
997 * Load mbufs in rx queue in advance
1000 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1002 struct vmxnet3_hw *hw = dev->data->dev_private;
1007 PMD_INIT_FUNC_TRACE();
1009 for (i = 0; i < hw->num_rx_queues; i++) {
1010 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1012 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1013 /* Passing 0 as alloc_num will allocate full ring */
1014 ret = vmxnet3_post_rx_bufs(rxq, j);
1016 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1019 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1020 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1021 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1022 rxq->cmd_ring[j].next2fill);
1025 rxq->stopped = FALSE;
1026 rxq->start_seg = NULL;
1029 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1030 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1032 txq->stopped = FALSE;
1038 static uint8_t rss_intel_key[40] = {
1039 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1040 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1041 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1042 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1043 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1047 * Configure RSS feature
1050 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1052 struct vmxnet3_hw *hw = dev->data->dev_private;
1053 struct VMXNET3_RSSConf *dev_rss_conf;
1054 struct rte_eth_rss_conf *port_rss_conf;
1058 PMD_INIT_FUNC_TRACE();
1060 dev_rss_conf = hw->rss_conf;
1061 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1063 /* loading hashFunc */
1064 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1065 /* loading hashKeySize */
1066 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1067 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1068 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1070 if (port_rss_conf->rss_key == NULL) {
1071 /* Default hash key */
1072 port_rss_conf->rss_key = rss_intel_key;
1075 /* loading hashKey */
1076 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1078 /* loading indTable */
1079 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1080 if (j == dev->data->nb_rx_queues)
1082 dev_rss_conf->indTable[i] = j;
1085 /* loading hashType */
1086 dev_rss_conf->hashType = 0;
1087 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1088 if (rss_hf & ETH_RSS_IPV4)
1089 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1090 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1091 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1092 if (rss_hf & ETH_RSS_IPV6)
1093 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1094 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1095 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1097 return VMXNET3_SUCCESS;