4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102 (unsigned long)rxq->cmd_ring[0].basePA,
103 (unsigned long)rxq->cmd_ring[1].basePA,
104 (unsigned long)rxq->comp_ring.basePA);
106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109 (uint32_t)rxq->cmd_ring[0].size, avail,
110 rxq->comp_ring.next2proc,
111 rxq->cmd_ring[0].size - avail);
113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116 rxq->cmd_ring[1].size - avail);
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131 (unsigned long)txq->cmd_ring.basePA,
132 (unsigned long)txq->comp_ring.basePA,
133 (unsigned long)txq->data_ring.basePA);
135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137 (uint32_t)txq->cmd_ring.size, avail,
138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
143 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
145 while (ring->next2comp != ring->next2fill) {
146 /* No need to worry about tx desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
150 rte_pktmbuf_free(buf_info->m);
155 vmxnet3_cmd_ring_adv_next2comp(ring);
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 vmxnet3_cmd_ring_release_mbufs(ring);
163 rte_free(ring->buf_info);
164 ring->buf_info = NULL;
168 vmxnet3_dev_tx_queue_release(void *txq)
170 vmxnet3_tx_queue_t *tq = txq;
173 /* Release the cmd_ring */
174 vmxnet3_cmd_ring_release(&tq->cmd_ring);
179 vmxnet3_dev_rx_queue_release(void *rxq)
182 vmxnet3_rx_queue_t *rq = rxq;
185 /* Release both the cmd_rings */
186 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
187 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
192 vmxnet3_dev_tx_queue_reset(void *txq)
194 vmxnet3_tx_queue_t *tq = txq;
195 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
196 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
197 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
201 /* Release the cmd_ring mbufs */
202 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
205 /* Tx vmxnet rings structure initialization*/
208 ring->gen = VMXNET3_INIT_GEN;
209 comp_ring->next2proc = 0;
210 comp_ring->gen = VMXNET3_INIT_GEN;
212 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
213 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
214 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
216 memset(ring->base, 0, size);
220 vmxnet3_dev_rx_queue_reset(void *rxq)
223 vmxnet3_rx_queue_t *rq = rxq;
224 struct vmxnet3_cmd_ring *ring0, *ring1;
225 struct vmxnet3_comp_ring *comp_ring;
229 /* Release both the cmd_rings mbufs */
230 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
231 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
234 ring0 = &rq->cmd_ring[0];
235 ring1 = &rq->cmd_ring[1];
236 comp_ring = &rq->comp_ring;
238 /* Rx vmxnet rings structure initialization */
239 ring0->next2fill = 0;
240 ring1->next2fill = 0;
241 ring0->next2comp = 0;
242 ring1->next2comp = 0;
243 ring0->gen = VMXNET3_INIT_GEN;
244 ring1->gen = VMXNET3_INIT_GEN;
245 comp_ring->next2proc = 0;
246 comp_ring->gen = VMXNET3_INIT_GEN;
248 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
249 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
251 memset(ring0->base, 0, size);
255 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
259 PMD_INIT_FUNC_TRACE();
261 for (i = 0; i < dev->data->nb_tx_queues; i++) {
262 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
266 vmxnet3_dev_tx_queue_reset(txq);
270 for (i = 0; i < dev->data->nb_rx_queues; i++) {
271 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
275 vmxnet3_dev_rx_queue_reset(rxq);
281 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
284 struct rte_mbuf *mbuf;
286 /* Release cmd_ring descriptor and free mbuf */
287 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
289 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
291 rte_panic("EOP desc does not point to a valid mbuf");
292 rte_pktmbuf_free(mbuf);
294 txq->cmd_ring.buf_info[eop_idx].m = NULL;
296 while (txq->cmd_ring.next2comp != eop_idx) {
297 /* no out-of-order completion */
298 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
299 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
303 /* Mark the txd for which tcd was generated as completed */
304 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
306 return completed + 1;
310 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
313 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
314 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
315 (comp_ring->base + comp_ring->next2proc);
317 while (tcd->gen == comp_ring->gen) {
318 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
320 vmxnet3_comp_ring_adv_next2proc(comp_ring);
321 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
322 comp_ring->next2proc);
325 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
329 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
333 vmxnet3_tx_queue_t *txq = tx_queue;
334 struct vmxnet3_hw *hw = txq->hw;
335 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
336 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
338 if (unlikely(txq->stopped)) {
339 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
343 /* Free up the comp_descriptors aggressively */
344 vmxnet3_tq_tx_complete(txq);
347 while (nb_tx < nb_pkts) {
348 Vmxnet3_GenericDesc *gdesc;
349 vmxnet3_buf_info_t *tbi;
350 uint32_t first2fill, avail, dw2;
351 struct rte_mbuf *txm = tx_pkts[nb_tx];
352 struct rte_mbuf *m_seg = txm;
354 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
355 /* # of descriptors needed for a packet. */
356 unsigned count = txm->nb_segs;
358 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
360 /* Is command ring full? */
361 if (unlikely(avail == 0)) {
362 PMD_TX_LOG(DEBUG, "No free ring descriptors");
363 txq->stats.tx_ring_full++;
364 txq->stats.drop_total += (nb_pkts - nb_tx);
368 /* Command ring is not full but cannot handle the
369 * multi-segmented packet. Let's try the next packet
372 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
373 "(avail %d needed %d)", avail, count);
374 txq->stats.drop_total++;
376 txq->stats.drop_tso++;
377 rte_pktmbuf_free(txm);
382 /* Drop non-TSO packet that is excessively fragmented */
383 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
384 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
385 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
386 txq->stats.drop_too_many_segs++;
387 txq->stats.drop_total++;
388 rte_pktmbuf_free(txm);
393 if (txm->nb_segs == 1 &&
394 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
395 struct Vmxnet3_TxDataDesc *tdd;
397 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
398 copy_size = rte_pktmbuf_pkt_len(txm);
399 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
402 /* use the previous gen bit for the SOP desc */
403 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
404 first2fill = txq->cmd_ring.next2fill;
406 /* Remember the transmit buffer for cleanup */
407 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
409 /* NB: the following assumes that VMXNET3 maximum
410 * transmit buffer size (16K) is greater than
411 * maximum size of mbuf segment size.
413 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
415 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
416 txq->cmd_ring.next2fill *
417 sizeof(struct Vmxnet3_TxDataDesc));
419 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
421 gdesc->dword[2] = dw2 | m_seg->data_len;
424 /* move to the next2fill descriptor */
425 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
427 /* use the right gen for non-SOP desc */
428 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
429 } while ((m_seg = m_seg->next) != NULL);
431 /* set the last buf_info for the pkt */
433 /* Update the EOP descriptor */
434 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
436 /* Add VLAN tag if present */
437 gdesc = txq->cmd_ring.base + first2fill;
438 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
440 gdesc->txd.tci = txm->vlan_tci;
444 uint16_t mss = txm->tso_segsz;
448 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
449 gdesc->txd.om = VMXNET3_OM_TSO;
450 gdesc->txd.msscof = mss;
452 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
453 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
454 gdesc->txd.om = VMXNET3_OM_CSUM;
455 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
457 switch (txm->ol_flags & PKT_TX_L4_MASK) {
458 case PKT_TX_TCP_CKSUM:
459 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
461 case PKT_TX_UDP_CKSUM:
462 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
465 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
466 txm->ol_flags & PKT_TX_L4_MASK);
472 gdesc->txd.om = VMXNET3_OM_NONE;
473 gdesc->txd.msscof = 0;
477 /* flip the GEN bit on the SOP */
478 rte_compiler_barrier();
479 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
481 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
485 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
487 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
488 txq_ctrl->txNumDeferred = 0;
489 /* Notify vSwitch that packets are available. */
490 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
491 txq->cmd_ring.next2fill);
498 * Allocates mbufs and clusters. Post rx descriptors with buffer details
499 * so that device can receive packets in those buffers.
501 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
502 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
503 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
504 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
508 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
511 uint32_t i = 0, val = 0;
512 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
515 /* Usually: One HEAD type buf per packet
516 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
517 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
520 /* We use single packet buffer so all heads here */
521 val = VMXNET3_RXD_BTYPE_HEAD;
523 /* All BODY type buffers for 2nd ring */
524 val = VMXNET3_RXD_BTYPE_BODY;
527 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
528 struct Vmxnet3_RxDesc *rxd;
529 struct rte_mbuf *mbuf;
530 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
532 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
534 /* Allocate blank mbuf for the current Rx Descriptor */
535 mbuf = rte_mbuf_raw_alloc(rxq->mp);
536 if (unlikely(mbuf == NULL)) {
537 PMD_RX_LOG(ERR, "Error allocating mbuf");
538 rxq->stats.rx_buf_alloc_failure++;
544 * Load mbuf pointer into buf_info[ring_size]
545 * buf_info structure is equivalent to cookie for virtio-virtqueue
548 buf_info->len = (uint16_t)(mbuf->buf_len -
549 RTE_PKTMBUF_HEADROOM);
550 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
552 /* Load Rx Descriptor with the buffer's GPA */
553 rxd->addr = buf_info->bufPA;
555 /* After this point rxd->addr MUST not be NULL */
557 rxd->len = buf_info->len;
558 /* Flip gen bit at the end to change ownership */
559 rxd->gen = ring->gen;
561 vmxnet3_cmd_ring_adv_next2fill(ring);
565 /* Return error only if no buffers are posted at present */
566 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
573 /* Receive side checksum and other offloads */
575 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
578 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
579 rxm->ol_flags |= PKT_RX_RSS_HASH;
580 rxm->hash.rss = rcd->rssHash;
583 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
585 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
586 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
588 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
589 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
591 rxm->packet_type = RTE_PTYPE_L3_IPV4;
595 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
597 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
598 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
604 * Process the Rx Completion Ring of given vmxnet3_rx_queue
605 * for nb_pkts burst and return the number of packets received
608 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
611 uint32_t nb_rxd, idx;
613 vmxnet3_rx_queue_t *rxq;
614 Vmxnet3_RxCompDesc *rcd;
615 vmxnet3_buf_info_t *rbi;
617 struct rte_mbuf *rxm = NULL;
618 struct vmxnet3_hw *hw;
628 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
630 if (unlikely(rxq->stopped)) {
631 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
635 while (rcd->gen == rxq->comp_ring.gen) {
636 if (nb_rx >= nb_pkts)
640 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
641 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
642 RTE_SET_USED(rxd); /* used only for assert when enabled */
643 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
645 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
647 RTE_ASSERT(rcd->len <= rxd->len);
650 /* Get the packet buffer pointer from buf_info */
653 /* Clear descriptor associated buf_info to be reused */
657 /* Update the index that we received a packet */
658 rxq->cmd_ring[ring_idx].next2comp = idx;
660 /* For RCD with EOP set, check if there is frame error */
661 if (unlikely(rcd->eop && rcd->err)) {
662 rxq->stats.drop_total++;
663 rxq->stats.drop_err++;
666 rxq->stats.drop_fcs++;
667 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
669 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
670 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
671 rxq->comp_ring.base), rcd->rxdIdx);
672 rte_pktmbuf_free_seg(rxm);
676 /* Initialize newly received packet buffer */
677 rxm->port = rxq->port_id;
680 rxm->pkt_len = (uint16_t)rcd->len;
681 rxm->data_len = (uint16_t)rcd->len;
682 rxm->data_off = RTE_PKTMBUF_HEADROOM;
687 * If this is the first buffer of the received packet,
688 * set the pointer to the first mbuf of the packet
689 * Otherwise, update the total length and the number of segments
690 * of the current scattered packet, and update the pointer to
691 * the last mbuf of the current packet.
694 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
696 if (unlikely(rcd->len == 0)) {
697 RTE_ASSERT(rcd->eop);
700 "Rx buf was skipped. rxring[%d][%d])",
702 rte_pktmbuf_free_seg(rxm);
706 rxq->start_seg = rxm;
707 vmxnet3_rx_offload(rcd, rxm);
709 struct rte_mbuf *start = rxq->start_seg;
711 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
713 start->pkt_len += rxm->data_len;
716 rxq->last_seg->next = rxm;
721 struct rte_mbuf *start = rxq->start_seg;
723 /* Check for hardware stripped VLAN tag */
725 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
726 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
729 rx_pkts[nb_rx++] = start;
730 rxq->start_seg = NULL;
734 rxq->cmd_ring[ring_idx].next2comp = idx;
735 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
736 rxq->cmd_ring[ring_idx].size);
738 /* It's time to allocate some new buf and renew descriptors */
739 vmxnet3_post_rx_bufs(rxq, ring_idx);
740 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
741 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
742 rxq->cmd_ring[ring_idx].next2fill);
745 /* Advance to the next descriptor in comp_ring */
746 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
748 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
750 if (nb_rxd > rxq->cmd_ring[0].size) {
751 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
752 " relinquish control.");
761 * Create memzone for device rings. malloc can't be used as the physical address is
762 * needed. If the memzone is already created, then this function returns a ptr
765 static const struct rte_memzone *
766 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
767 uint16_t queue_id, uint32_t ring_size, int socket_id)
769 char z_name[RTE_MEMZONE_NAMESIZE];
770 const struct rte_memzone *mz;
772 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
773 dev->driver->pci_drv.driver.name, ring_name,
774 dev->data->port_id, queue_id);
776 mz = rte_memzone_lookup(z_name);
780 return rte_memzone_reserve_aligned(z_name, ring_size,
781 socket_id, 0, VMXNET3_RING_BA_ALIGN);
785 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
788 unsigned int socket_id,
789 __rte_unused const struct rte_eth_txconf *tx_conf)
791 struct vmxnet3_hw *hw = dev->data->dev_private;
792 const struct rte_memzone *mz;
793 struct vmxnet3_tx_queue *txq;
794 struct vmxnet3_cmd_ring *ring;
795 struct vmxnet3_comp_ring *comp_ring;
796 struct vmxnet3_data_ring *data_ring;
799 PMD_INIT_FUNC_TRACE();
801 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
802 ETH_TXQ_FLAGS_NOXSUMSCTP) {
803 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
807 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
808 RTE_CACHE_LINE_SIZE);
810 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
814 txq->queue_id = queue_idx;
815 txq->port_id = dev->data->port_id;
816 txq->shared = &hw->tqd_start[queue_idx];
818 txq->qid = queue_idx;
821 ring = &txq->cmd_ring;
822 comp_ring = &txq->comp_ring;
823 data_ring = &txq->data_ring;
825 /* Tx vmxnet ring length should be between 512-4096 */
826 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
827 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
828 VMXNET3_DEF_TX_RING_SIZE);
830 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
831 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
832 VMXNET3_TX_RING_MAX_SIZE);
835 ring->size = nb_desc;
836 ring->size &= ~VMXNET3_RING_SIZE_MASK;
838 comp_ring->size = data_ring->size = ring->size;
840 /* Tx vmxnet rings structure initialization*/
843 ring->gen = VMXNET3_INIT_GEN;
844 comp_ring->next2proc = 0;
845 comp_ring->gen = VMXNET3_INIT_GEN;
847 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
848 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
849 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
851 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
853 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
856 memset(mz->addr, 0, mz->len);
858 /* cmd_ring initialization */
859 ring->base = mz->addr;
860 ring->basePA = mz->phys_addr;
862 /* comp_ring initialization */
863 comp_ring->base = ring->base + ring->size;
864 comp_ring->basePA = ring->basePA +
865 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
867 /* data_ring initialization */
868 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
869 data_ring->basePA = comp_ring->basePA +
870 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
872 /* cmd_ring0 buf_info allocation */
873 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
874 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
875 if (ring->buf_info == NULL) {
876 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
880 /* Update the data portion with txq */
881 dev->data->tx_queues[queue_idx] = txq;
887 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
890 unsigned int socket_id,
891 __rte_unused const struct rte_eth_rxconf *rx_conf,
892 struct rte_mempool *mp)
894 const struct rte_memzone *mz;
895 struct vmxnet3_rx_queue *rxq;
896 struct vmxnet3_hw *hw = dev->data->dev_private;
897 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
898 struct vmxnet3_comp_ring *comp_ring;
903 PMD_INIT_FUNC_TRACE();
905 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
906 RTE_CACHE_LINE_SIZE);
908 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
913 rxq->queue_id = queue_idx;
914 rxq->port_id = dev->data->port_id;
915 rxq->shared = &hw->rqd_start[queue_idx];
917 rxq->qid1 = queue_idx;
918 rxq->qid2 = queue_idx + hw->num_rx_queues;
921 ring0 = &rxq->cmd_ring[0];
922 ring1 = &rxq->cmd_ring[1];
923 comp_ring = &rxq->comp_ring;
925 /* Rx vmxnet rings length should be between 256-4096 */
926 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
927 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
929 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
930 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
933 ring0->size = nb_desc;
934 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
935 ring1->size = ring0->size;
938 comp_ring->size = ring0->size + ring1->size;
940 /* Rx vmxnet rings structure initialization */
941 ring0->next2fill = 0;
942 ring1->next2fill = 0;
943 ring0->next2comp = 0;
944 ring1->next2comp = 0;
945 ring0->gen = VMXNET3_INIT_GEN;
946 ring1->gen = VMXNET3_INIT_GEN;
947 comp_ring->next2proc = 0;
948 comp_ring->gen = VMXNET3_INIT_GEN;
950 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
951 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
953 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
955 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
958 memset(mz->addr, 0, mz->len);
960 /* cmd_ring0 initialization */
961 ring0->base = mz->addr;
962 ring0->basePA = mz->phys_addr;
964 /* cmd_ring1 initialization */
965 ring1->base = ring0->base + ring0->size;
966 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
968 /* comp_ring initialization */
969 comp_ring->base = ring1->base + ring1->size;
970 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
973 /* cmd_ring0-cmd_ring1 buf_info allocation */
974 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
976 ring = &rxq->cmd_ring[i];
978 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
980 ring->buf_info = rte_zmalloc(mem_name,
981 ring->size * sizeof(vmxnet3_buf_info_t),
982 RTE_CACHE_LINE_SIZE);
983 if (ring->buf_info == NULL) {
984 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
989 /* Update the data portion with rxq */
990 dev->data->rx_queues[queue_idx] = rxq;
996 * Initializes Receive Unit
997 * Load mbufs in rx queue in advance
1000 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1002 struct vmxnet3_hw *hw = dev->data->dev_private;
1007 PMD_INIT_FUNC_TRACE();
1009 for (i = 0; i < hw->num_rx_queues; i++) {
1010 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1012 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1013 /* Passing 0 as alloc_num will allocate full ring */
1014 ret = vmxnet3_post_rx_bufs(rxq, j);
1017 "ERROR: Posting Rxq: %d buffers ring: %d",
1022 * Updating device with the index:next2fill to fill the
1023 * mbufs for coming packets.
1025 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1026 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1027 rxq->cmd_ring[j].next2fill);
1030 rxq->stopped = FALSE;
1031 rxq->start_seg = NULL;
1034 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1035 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1037 txq->stopped = FALSE;
1043 static uint8_t rss_intel_key[40] = {
1044 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1045 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1046 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1047 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1048 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1052 * Configure RSS feature
1055 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1057 struct vmxnet3_hw *hw = dev->data->dev_private;
1058 struct VMXNET3_RSSConf *dev_rss_conf;
1059 struct rte_eth_rss_conf *port_rss_conf;
1063 PMD_INIT_FUNC_TRACE();
1065 dev_rss_conf = hw->rss_conf;
1066 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1068 /* loading hashFunc */
1069 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1070 /* loading hashKeySize */
1071 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1072 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1073 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1075 if (port_rss_conf->rss_key == NULL) {
1076 /* Default hash key */
1077 port_rss_conf->rss_key = rss_intel_key;
1080 /* loading hashKey */
1081 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1082 dev_rss_conf->hashKeySize);
1084 /* loading indTable */
1085 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1086 if (j == dev->data->nb_rx_queues)
1088 dev_rss_conf->indTable[i] = j;
1091 /* loading hashType */
1092 dev_rss_conf->hashType = 0;
1093 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1094 if (rss_hf & ETH_RSS_IPV4)
1095 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1096 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1097 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1098 if (rss_hf & ETH_RSS_IPV6)
1099 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1100 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1101 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1103 return VMXNET3_SUCCESS;