4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81 (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
89 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 static struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
100 m = __rte_mbuf_raw_alloc(mp);
101 __rte_mbuf_sanity_check_raw(m, 0);
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
115 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119 (unsigned long)rxq->cmd_ring[0].basePA,
120 (unsigned long)rxq->cmd_ring[1].basePA,
121 (unsigned long)rxq->comp_ring.basePA);
123 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[0].size, avail,
127 rxq->comp_ring.next2proc,
128 rxq->cmd_ring[0].size - avail);
130 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133 rxq->cmd_ring[1].size - avail);
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
145 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148 (unsigned long)txq->cmd_ring.basePA,
149 (unsigned long)txq->comp_ring.basePA,
150 (unsigned long)txq->data_ring.basePA);
152 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154 (uint32_t)txq->cmd_ring.size, avail,
155 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 while (ring->next2comp != ring->next2fill) {
163 /* No need to worry about tx desc ownership, device is quiesced by now. */
164 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
167 rte_pktmbuf_free(buf_info->m);
172 vmxnet3_cmd_ring_adv_next2comp(ring);
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 vmxnet3_cmd_ring_release_mbufs(ring);
180 rte_free(ring->buf_info);
181 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
191 /* Release the cmd_ring */
192 vmxnet3_cmd_ring_release(&tq->cmd_ring);
197 vmxnet3_dev_rx_queue_release(void *rxq)
200 vmxnet3_rx_queue_t *rq = rxq;
203 /* Release both the cmd_rings */
204 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
210 vmxnet3_dev_tx_queue_reset(void *txq)
212 vmxnet3_tx_queue_t *tq = txq;
213 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
219 /* Release the cmd_ring mbufs */
220 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
223 /* Tx vmxnet rings structure initialization*/
226 ring->gen = VMXNET3_INIT_GEN;
227 comp_ring->next2proc = 0;
228 comp_ring->gen = VMXNET3_INIT_GEN;
230 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
234 memset(ring->base, 0, size);
238 vmxnet3_dev_rx_queue_reset(void *rxq)
241 vmxnet3_rx_queue_t *rq = rxq;
242 struct vmxnet3_cmd_ring *ring0, *ring1;
243 struct vmxnet3_comp_ring *comp_ring;
247 /* Release both the cmd_rings mbufs */
248 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
252 ring0 = &rq->cmd_ring[0];
253 ring1 = &rq->cmd_ring[1];
254 comp_ring = &rq->comp_ring;
256 /* Rx vmxnet rings structure initialization */
257 ring0->next2fill = 0;
258 ring1->next2fill = 0;
259 ring0->next2comp = 0;
260 ring1->next2comp = 0;
261 ring0->gen = VMXNET3_INIT_GEN;
262 ring1->gen = VMXNET3_INIT_GEN;
263 comp_ring->next2proc = 0;
264 comp_ring->gen = VMXNET3_INIT_GEN;
266 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
269 memset(ring0->base, 0, size);
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
277 PMD_INIT_FUNC_TRACE();
279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
284 vmxnet3_dev_tx_queue_reset(txq);
288 for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
293 vmxnet3_dev_rx_queue_reset(rxq);
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
302 struct rte_mbuf *mbuf;
303 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305 (comp_ring->base + comp_ring->next2proc);
307 while (tcd->gen == comp_ring->gen) {
308 /* Release cmd_ring descriptor and free mbuf */
309 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
310 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
311 mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
312 txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
313 rte_pktmbuf_free_seg(mbuf);
315 /* Mark the txd for which tcd was generated as completed */
316 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
320 vmxnet3_comp_ring_adv_next2proc(comp_ring);
321 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
322 comp_ring->next2proc);
325 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
329 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
333 vmxnet3_tx_queue_t *txq = tx_queue;
334 struct vmxnet3_hw *hw = txq->hw;
336 if (unlikely(txq->stopped)) {
337 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
341 /* Free up the comp_descriptors aggressively */
342 vmxnet3_tq_tx_complete(txq);
345 while (nb_tx < nb_pkts) {
346 Vmxnet3_GenericDesc *gdesc;
347 vmxnet3_buf_info_t *tbi;
348 uint32_t first2fill, avail, dw2;
349 struct rte_mbuf *txm = tx_pkts[nb_tx];
350 struct rte_mbuf *m_seg = txm;
352 /* Is this packet execessively fragmented, then drop */
353 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
354 ++txq->stats.drop_too_many_segs;
355 ++txq->stats.drop_total;
356 rte_pktmbuf_free(txm);
361 /* Is command ring full? */
362 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
363 if (txm->nb_segs > avail) {
364 ++txq->stats.tx_ring_full;
368 /* use the previous gen bit for the SOP desc */
369 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
370 first2fill = txq->cmd_ring.next2fill;
372 /* Remember the transmit buffer for cleanup */
373 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
376 /* NB: the following assumes that VMXNET3 maximum
377 transmit buffer size (16K) is greater than
378 maximum sizeof mbuf segment size. */
379 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
380 gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
381 gdesc->dword[2] = dw2 | m_seg->data_len;
384 /* move to the next2fill descriptor */
385 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
387 /* use the right gen for non-SOP desc */
388 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
389 } while ((m_seg = m_seg->next) != NULL);
391 /* Update the EOP descriptor */
392 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
394 /* Add VLAN tag if present */
395 gdesc = txq->cmd_ring.base + first2fill;
396 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
398 gdesc->txd.tci = txm->vlan_tci;
401 /* TODO: Add transmit checksum offload here */
403 /* flip the GEN bit on the SOP */
404 rte_compiler_barrier();
405 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
407 txq->shared->ctrl.txNumDeferred++;
411 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
413 if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
415 txq->shared->ctrl.txNumDeferred = 0;
416 /* Notify vSwitch that packets are available. */
417 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
418 txq->cmd_ring.next2fill);
425 * Allocates mbufs and clusters. Post rx descriptors with buffer details
426 * so that device can receive packets in those buffers.
428 * Among the two rings, 1st ring contains buffers of type 0 and type1.
429 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
430 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
431 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
436 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
439 uint32_t i = 0, val = 0;
440 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
443 /* Usually: One HEAD type buf per packet
444 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
445 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
448 /* We use single packet buffer so all heads here */
449 val = VMXNET3_RXD_BTYPE_HEAD;
451 /* All BODY type buffers for 2nd ring */
452 val = VMXNET3_RXD_BTYPE_BODY;
455 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
456 struct Vmxnet3_RxDesc *rxd;
457 struct rte_mbuf *mbuf;
458 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
460 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
462 /* Allocate blank mbuf for the current Rx Descriptor */
463 mbuf = rte_rxmbuf_alloc(rxq->mp);
464 if (unlikely(mbuf == NULL)) {
465 PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
466 rxq->stats.rx_buf_alloc_failure++;
472 * Load mbuf pointer into buf_info[ring_size]
473 * buf_info structure is equivalent to cookie for virtio-virtqueue
476 buf_info->len = (uint16_t)(mbuf->buf_len -
477 RTE_PKTMBUF_HEADROOM);
478 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
480 /* Load Rx Descriptor with the buffer's GPA */
481 rxd->addr = buf_info->bufPA;
483 /* After this point rxd->addr MUST not be NULL */
485 rxd->len = buf_info->len;
486 /* Flip gen bit at the end to change ownership */
487 rxd->gen = ring->gen;
489 vmxnet3_cmd_ring_adv_next2fill(ring);
493 /* Return error only if no buffers are posted at present */
494 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
501 /* Receive side checksum and other offloads */
503 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
505 /* Check for hardware stripped VLAN tag */
507 rxm->ol_flags |= PKT_RX_VLAN_PKT;
508 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
512 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
513 rxm->ol_flags |= PKT_RX_RSS_HASH;
514 rxm->hash.rss = rcd->rssHash;
517 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
519 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
520 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
522 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
523 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
525 rxm->ol_flags |= PKT_RX_IPV4_HDR;
529 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
531 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
532 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
538 * Process the Rx Completion Ring of given vmxnet3_rx_queue
539 * for nb_pkts burst and return the number of packets received
542 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
545 uint32_t nb_rxd, idx;
547 vmxnet3_rx_queue_t *rxq;
548 Vmxnet3_RxCompDesc *rcd;
549 vmxnet3_buf_info_t *rbi;
551 struct rte_mbuf *rxm = NULL;
552 struct vmxnet3_hw *hw;
562 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
564 if (unlikely(rxq->stopped)) {
565 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
569 while (rcd->gen == rxq->comp_ring.gen) {
570 if (nb_rx >= nb_pkts)
574 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
575 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
576 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
578 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
579 rte_pktmbuf_free_seg(rbi->m);
580 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
584 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
586 VMXNET3_ASSERT(rcd->len <= rxd->len);
587 VMXNET3_ASSERT(rbi->m);
589 if (unlikely(rcd->len == 0)) {
590 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
592 VMXNET3_ASSERT(rcd->sop && rcd->eop);
593 rte_pktmbuf_free_seg(rbi->m);
597 /* Assuming a packet is coming in a single packet buffer */
598 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
600 "Alert : Misbehaving device, incorrect "
601 " buffer type used. iPacket dropped.");
602 rte_pktmbuf_free_seg(rbi->m);
605 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
607 /* Get the packet buffer pointer from buf_info */
610 /* Clear descriptor associated buf_info to be reused */
614 /* Update the index that we received a packet */
615 rxq->cmd_ring[ring_idx].next2comp = idx;
617 /* For RCD with EOP set, check if there is frame error */
618 if (unlikely(rcd->err)) {
619 rxq->stats.drop_total++;
620 rxq->stats.drop_err++;
623 rxq->stats.drop_fcs++;
624 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
626 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
627 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
628 rxq->comp_ring.base), rcd->rxdIdx);
629 rte_pktmbuf_free_seg(rxm);
634 /* Initialize newly received packet buffer */
635 rxm->port = rxq->port_id;
638 rxm->pkt_len = (uint16_t)rcd->len;
639 rxm->data_len = (uint16_t)rcd->len;
640 rxm->data_off = RTE_PKTMBUF_HEADROOM;
644 vmxnet3_rx_offload(rcd, rxm);
646 rx_pkts[nb_rx++] = rxm;
648 rxq->cmd_ring[ring_idx].next2comp = idx;
649 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
651 /* It's time to allocate some new buf and renew descriptors */
652 vmxnet3_post_rx_bufs(rxq, ring_idx);
653 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
654 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
655 rxq->cmd_ring[ring_idx].next2fill);
658 /* Advance to the next descriptor in comp_ring */
659 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
661 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
663 if (nb_rxd > rxq->cmd_ring[0].size) {
665 "Used up quota of receiving packets,"
666 " relinquish control.");
675 * Create memzone for device rings. malloc can't be used as the physical address is
676 * needed. If the memzone is already created, then this function returns a ptr
679 static const struct rte_memzone *
680 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
681 uint16_t queue_id, uint32_t ring_size, int socket_id)
683 char z_name[RTE_MEMZONE_NAMESIZE];
684 const struct rte_memzone *mz;
686 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
687 dev->driver->pci_drv.name, ring_name,
688 dev->data->port_id, queue_id);
690 mz = rte_memzone_lookup(z_name);
694 return rte_memzone_reserve_aligned(z_name, ring_size,
695 socket_id, 0, VMXNET3_RING_BA_ALIGN);
699 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
702 unsigned int socket_id,
703 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
705 struct vmxnet3_hw *hw = dev->data->dev_private;
706 const struct rte_memzone *mz;
707 struct vmxnet3_tx_queue *txq;
708 struct vmxnet3_cmd_ring *ring;
709 struct vmxnet3_comp_ring *comp_ring;
710 struct vmxnet3_data_ring *data_ring;
713 PMD_INIT_FUNC_TRACE();
715 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
716 ETH_TXQ_FLAGS_NOXSUMS) {
717 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
721 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
723 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
727 txq->queue_id = queue_idx;
728 txq->port_id = dev->data->port_id;
729 txq->shared = &hw->tqd_start[queue_idx];
731 txq->qid = queue_idx;
734 ring = &txq->cmd_ring;
735 comp_ring = &txq->comp_ring;
736 data_ring = &txq->data_ring;
738 /* Tx vmxnet ring length should be between 512-4096 */
739 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
740 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
741 VMXNET3_DEF_TX_RING_SIZE);
743 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
744 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
745 VMXNET3_TX_RING_MAX_SIZE);
748 ring->size = nb_desc;
749 ring->size &= ~VMXNET3_RING_SIZE_MASK;
751 comp_ring->size = data_ring->size = ring->size;
753 /* Tx vmxnet rings structure initialization*/
756 ring->gen = VMXNET3_INIT_GEN;
757 comp_ring->next2proc = 0;
758 comp_ring->gen = VMXNET3_INIT_GEN;
760 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
761 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
762 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
764 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
766 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
769 memset(mz->addr, 0, mz->len);
771 /* cmd_ring initialization */
772 ring->base = mz->addr;
773 ring->basePA = mz->phys_addr;
775 /* comp_ring initialization */
776 comp_ring->base = ring->base + ring->size;
777 comp_ring->basePA = ring->basePA +
778 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
780 /* data_ring initialization */
781 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
782 data_ring->basePA = comp_ring->basePA +
783 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
785 /* cmd_ring0 buf_info allocation */
786 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
787 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
788 if (ring->buf_info == NULL) {
789 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
793 /* Update the data portion with txq */
794 dev->data->tx_queues[queue_idx] = txq;
800 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
803 unsigned int socket_id,
804 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
805 struct rte_mempool *mp)
807 const struct rte_memzone *mz;
808 struct vmxnet3_rx_queue *rxq;
809 struct vmxnet3_hw *hw = dev->data->dev_private;
810 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
811 struct vmxnet3_comp_ring *comp_ring;
817 PMD_INIT_FUNC_TRACE();
819 buf_size = rte_pktmbuf_data_room_size(mp) -
820 RTE_PKTMBUF_HEADROOM;
822 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
823 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
824 "VMXNET3 don't support scatter packets yet",
825 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
829 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
831 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
836 rxq->queue_id = queue_idx;
837 rxq->port_id = dev->data->port_id;
838 rxq->shared = &hw->rqd_start[queue_idx];
840 rxq->qid1 = queue_idx;
841 rxq->qid2 = queue_idx + hw->num_rx_queues;
844 ring0 = &rxq->cmd_ring[0];
845 ring1 = &rxq->cmd_ring[1];
846 comp_ring = &rxq->comp_ring;
848 /* Rx vmxnet rings length should be between 256-4096 */
849 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
850 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
852 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
853 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
856 ring0->size = nb_desc;
857 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
858 ring1->size = ring0->size;
861 comp_ring->size = ring0->size + ring1->size;
863 /* Rx vmxnet rings structure initialization */
864 ring0->next2fill = 0;
865 ring1->next2fill = 0;
866 ring0->next2comp = 0;
867 ring1->next2comp = 0;
868 ring0->gen = VMXNET3_INIT_GEN;
869 ring1->gen = VMXNET3_INIT_GEN;
870 comp_ring->next2proc = 0;
871 comp_ring->gen = VMXNET3_INIT_GEN;
873 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
874 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
876 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
878 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
881 memset(mz->addr, 0, mz->len);
883 /* cmd_ring0 initialization */
884 ring0->base = mz->addr;
885 ring0->basePA = mz->phys_addr;
887 /* cmd_ring1 initialization */
888 ring1->base = ring0->base + ring0->size;
889 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
891 /* comp_ring initialization */
892 comp_ring->base = ring1->base + ring1->size;
893 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
896 /* cmd_ring0-cmd_ring1 buf_info allocation */
897 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
899 ring = &rxq->cmd_ring[i];
901 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
903 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
904 if (ring->buf_info == NULL) {
905 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
910 /* Update the data portion with rxq */
911 dev->data->rx_queues[queue_idx] = rxq;
917 * Initializes Receive Unit
918 * Load mbufs in rx queue in advance
921 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
923 struct vmxnet3_hw *hw = dev->data->dev_private;
928 PMD_INIT_FUNC_TRACE();
930 for (i = 0; i < hw->num_rx_queues; i++) {
931 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
933 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
934 /* Passing 0 as alloc_num will allocate full ring */
935 ret = vmxnet3_post_rx_bufs(rxq, j);
937 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
940 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
941 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
942 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
943 rxq->cmd_ring[j].next2fill);
946 rxq->stopped = FALSE;
949 for (i = 0; i < dev->data->nb_tx_queues; i++) {
950 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
952 txq->stopped = FALSE;
958 static uint8_t rss_intel_key[40] = {
959 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
960 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
961 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
962 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
963 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
967 * Configure RSS feature
970 vmxnet3_rss_configure(struct rte_eth_dev *dev)
972 struct vmxnet3_hw *hw = dev->data->dev_private;
973 struct VMXNET3_RSSConf *dev_rss_conf;
974 struct rte_eth_rss_conf *port_rss_conf;
978 PMD_INIT_FUNC_TRACE();
980 dev_rss_conf = hw->rss_conf;
981 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
983 /* loading hashFunc */
984 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
985 /* loading hashKeySize */
986 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
987 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
988 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
990 if (port_rss_conf->rss_key == NULL) {
991 /* Default hash key */
992 port_rss_conf->rss_key = rss_intel_key;
995 /* loading hashKey */
996 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
998 /* loading indTable */
999 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1000 if (j == dev->data->nb_rx_queues)
1002 dev_rss_conf->indTable[i] = j;
1005 /* loading hashType */
1006 dev_rss_conf->hashType = 0;
1007 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1008 if (rss_hf & ETH_RSS_IPV4)
1009 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1010 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1011 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1012 if (rss_hf & ETH_RSS_IPV6)
1013 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1014 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1015 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1017 return VMXNET3_SUCCESS;