4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81 (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
89 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 static inline struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
100 m = __rte_mbuf_raw_alloc(mp);
101 __rte_mbuf_sanity_check_raw(m, 0);
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
115 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119 (unsigned long)rxq->cmd_ring[0].basePA,
120 (unsigned long)rxq->cmd_ring[1].basePA,
121 (unsigned long)rxq->comp_ring.basePA);
123 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[0].size, avail,
127 rxq->comp_ring.next2proc,
128 rxq->cmd_ring[0].size - avail);
130 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133 rxq->cmd_ring[1].size - avail);
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
145 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148 (unsigned long)txq->cmd_ring.basePA,
149 (unsigned long)txq->comp_ring.basePA,
150 (unsigned long)txq->data_ring.basePA);
152 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154 (uint32_t)txq->cmd_ring.size, avail,
155 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 while (ring->next2comp != ring->next2fill) {
163 /* No need to worry about tx desc ownership, device is quiesced by now. */
164 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
167 rte_pktmbuf_free(buf_info->m);
172 vmxnet3_cmd_ring_adv_next2comp(ring);
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 vmxnet3_cmd_ring_release_mbufs(ring);
180 rte_free(ring->buf_info);
181 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
191 /* Release the cmd_ring */
192 vmxnet3_cmd_ring_release(&tq->cmd_ring);
197 vmxnet3_dev_rx_queue_release(void *rxq)
200 vmxnet3_rx_queue_t *rq = rxq;
203 /* Release both the cmd_rings */
204 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
210 vmxnet3_dev_tx_queue_reset(void *txq)
212 vmxnet3_tx_queue_t *tq = txq;
213 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
219 /* Release the cmd_ring mbufs */
220 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
223 /* Tx vmxnet rings structure initialization*/
226 ring->gen = VMXNET3_INIT_GEN;
227 comp_ring->next2proc = 0;
228 comp_ring->gen = VMXNET3_INIT_GEN;
230 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
234 memset(ring->base, 0, size);
238 vmxnet3_dev_rx_queue_reset(void *rxq)
241 vmxnet3_rx_queue_t *rq = rxq;
242 struct vmxnet3_cmd_ring *ring0, *ring1;
243 struct vmxnet3_comp_ring *comp_ring;
247 /* Release both the cmd_rings mbufs */
248 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
252 ring0 = &rq->cmd_ring[0];
253 ring1 = &rq->cmd_ring[1];
254 comp_ring = &rq->comp_ring;
256 /* Rx vmxnet rings structure initialization */
257 ring0->next2fill = 0;
258 ring1->next2fill = 0;
259 ring0->next2comp = 0;
260 ring1->next2comp = 0;
261 ring0->gen = VMXNET3_INIT_GEN;
262 ring1->gen = VMXNET3_INIT_GEN;
263 comp_ring->next2proc = 0;
264 comp_ring->gen = VMXNET3_INIT_GEN;
266 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
269 memset(ring0->base, 0, size);
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
277 PMD_INIT_FUNC_TRACE();
279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
284 vmxnet3_dev_tx_queue_reset(txq);
288 for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
293 vmxnet3_dev_rx_queue_reset(rxq);
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
302 struct rte_mbuf *mbuf;
303 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305 (comp_ring->base + comp_ring->next2proc);
307 while (tcd->gen == comp_ring->gen) {
309 /* Release cmd_ring descriptor and free mbuf */
310 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
311 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
313 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
314 if (unlikely(mbuf == NULL))
315 rte_panic("EOP desc does not point to a valid mbuf");
317 rte_pktmbuf_free(mbuf);
320 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
321 /* Mark the txd for which tcd was generated as completed */
322 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
324 vmxnet3_comp_ring_adv_next2proc(comp_ring);
325 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
326 comp_ring->next2proc);
330 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
334 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
338 Vmxnet3_TxDesc *txd = NULL;
339 vmxnet3_buf_info_t *tbi = NULL;
340 struct vmxnet3_hw *hw;
341 struct rte_mbuf *txm;
342 vmxnet3_tx_queue_t *txq = tx_queue;
346 if (unlikely(txq->stopped)) {
347 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
351 /* Free up the comp_descriptors aggressively */
352 vmxnet3_tq_tx_complete(txq);
355 while (nb_tx < nb_pkts) {
357 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
360 txm = tx_pkts[nb_tx];
361 /* Don't support scatter packets yet, free them if met */
362 if (txm->nb_segs != 1) {
363 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
364 rte_pktmbuf_free(tx_pkts[nb_tx]);
365 txq->stats.drop_total++;
371 /* Needs to minus ether header len */
372 if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
373 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
374 rte_pktmbuf_free(tx_pkts[nb_tx]);
375 txq->stats.drop_total++;
381 txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
382 if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
383 struct Vmxnet3_TxDataDesc *tdd;
385 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
386 copy_size = rte_pktmbuf_pkt_len(txm);
387 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
390 /* Fill the tx descriptor */
391 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
392 tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
394 txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
395 txq->cmd_ring.next2fill *
396 sizeof(struct Vmxnet3_TxDataDesc));
398 txd->addr = tbi->bufPA;
399 txd->len = txm->data_len;
401 /* Mark the last descriptor as End of Packet. */
405 /* Add VLAN tag if requested */
406 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
408 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
411 /* Record current mbuf for freeing it later in tx complete */
412 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
417 /* Set the offloading mode to default */
419 txd->om = VMXNET3_OM_NONE;
422 /* finally flip the GEN bit of the SOP desc */
423 txd->gen = txq->cmd_ring.gen;
424 txq->shared->ctrl.txNumDeferred++;
426 /* move to the next2fill descriptor */
427 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
431 PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
432 txq->stats.drop_total += (nb_pkts - nb_tx);
437 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
439 if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
441 txq->shared->ctrl.txNumDeferred = 0;
442 /* Notify vSwitch that packets are available. */
443 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
444 txq->cmd_ring.next2fill);
451 * Allocates mbufs and clusters. Post rx descriptors with buffer details
452 * so that device can receive packets in those buffers.
454 * Among the two rings, 1st ring contains buffers of type 0 and type1.
455 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
456 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
457 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
462 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
465 uint32_t i = 0, val = 0;
466 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
469 /* Usually: One HEAD type buf per packet
470 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
471 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
474 /* We use single packet buffer so all heads here */
475 val = VMXNET3_RXD_BTYPE_HEAD;
477 /* All BODY type buffers for 2nd ring */
478 val = VMXNET3_RXD_BTYPE_BODY;
481 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
482 struct Vmxnet3_RxDesc *rxd;
483 struct rte_mbuf *mbuf;
484 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
486 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
488 /* Allocate blank mbuf for the current Rx Descriptor */
489 mbuf = rte_rxmbuf_alloc(rxq->mp);
490 if (unlikely(mbuf == NULL)) {
491 PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
492 rxq->stats.rx_buf_alloc_failure++;
498 * Load mbuf pointer into buf_info[ring_size]
499 * buf_info structure is equivalent to cookie for virtio-virtqueue
502 buf_info->len = (uint16_t)(mbuf->buf_len -
503 RTE_PKTMBUF_HEADROOM);
504 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
506 /* Load Rx Descriptor with the buffer's GPA */
507 rxd->addr = buf_info->bufPA;
509 /* After this point rxd->addr MUST not be NULL */
511 rxd->len = buf_info->len;
512 /* Flip gen bit at the end to change ownership */
513 rxd->gen = ring->gen;
515 vmxnet3_cmd_ring_adv_next2fill(ring);
519 /* Return error only if no buffers are posted at present */
520 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
527 * Process the Rx Completion Ring of given vmxnet3_rx_queue
528 * for nb_pkts burst and return the number of packets received
531 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
534 uint32_t nb_rxd, idx;
536 vmxnet3_rx_queue_t *rxq;
537 Vmxnet3_RxCompDesc *rcd;
538 vmxnet3_buf_info_t *rbi;
540 struct rte_mbuf *rxm = NULL;
541 struct vmxnet3_hw *hw;
551 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
553 if (unlikely(rxq->stopped)) {
554 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
558 while (rcd->gen == rxq->comp_ring.gen) {
559 if (nb_rx >= nb_pkts)
563 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
564 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
565 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
567 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
568 rte_pktmbuf_free_seg(rbi->m);
569 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
573 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
575 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
576 VMXNET3_ASSERT(rcd->len <= rxd->len);
577 VMXNET3_ASSERT(rbi->m);
579 if (unlikely(rcd->len == 0)) {
580 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
582 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
583 VMXNET3_ASSERT(rcd->sop && rcd->eop);
585 rte_pktmbuf_free_seg(rbi->m);
589 /* Assuming a packet is coming in a single packet buffer */
590 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
592 "Alert : Misbehaving device, incorrect "
593 " buffer type used. iPacket dropped.");
594 rte_pktmbuf_free_seg(rbi->m);
597 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
598 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
600 /* Get the packet buffer pointer from buf_info */
603 /* Clear descriptor associated buf_info to be reused */
607 /* Update the index that we received a packet */
608 rxq->cmd_ring[ring_idx].next2comp = idx;
610 /* For RCD with EOP set, check if there is frame error */
611 if (unlikely(rcd->err)) {
612 rxq->stats.drop_total++;
613 rxq->stats.drop_err++;
616 rxq->stats.drop_fcs++;
617 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
619 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
620 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
621 rxq->comp_ring.base), rcd->rxdIdx);
622 rte_pktmbuf_free_seg(rxm);
626 /* Check for hardware stripped VLAN tag */
628 PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
630 rxm->ol_flags = PKT_RX_VLAN_PKT;
631 /* Copy vlan tag in packet buffer */
632 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
638 /* Initialize newly received packet buffer */
639 rxm->port = rxq->port_id;
642 rxm->pkt_len = (uint16_t)rcd->len;
643 rxm->data_len = (uint16_t)rcd->len;
644 rxm->data_off = RTE_PKTMBUF_HEADROOM;
646 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
648 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
649 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
651 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
652 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
654 rxm->ol_flags |= PKT_RX_IPV4_HDR;
658 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
660 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
661 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
665 rx_pkts[nb_rx++] = rxm;
667 rxq->cmd_ring[ring_idx].next2comp = idx;
668 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
670 /* It's time to allocate some new buf and renew descriptors */
671 vmxnet3_post_rx_bufs(rxq, ring_idx);
672 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
673 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
674 rxq->cmd_ring[ring_idx].next2fill);
677 /* Advance to the next descriptor in comp_ring */
678 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
680 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
682 if (nb_rxd > rxq->cmd_ring[0].size) {
684 "Used up quota of receiving packets,"
685 " relinquish control.");
694 * Create memzone for device rings. malloc can't be used as the physical address is
695 * needed. If the memzone is already created, then this function returns a ptr
698 static const struct rte_memzone *
699 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
700 uint16_t queue_id, uint32_t ring_size, int socket_id)
702 char z_name[RTE_MEMZONE_NAMESIZE];
703 const struct rte_memzone *mz;
705 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
706 dev->driver->pci_drv.name, ring_name,
707 dev->data->port_id, queue_id);
709 mz = rte_memzone_lookup(z_name);
713 return rte_memzone_reserve_aligned(z_name, ring_size,
714 socket_id, 0, VMXNET3_RING_BA_ALIGN);
718 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
721 unsigned int socket_id,
722 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
724 struct vmxnet3_hw *hw = dev->data->dev_private;
725 const struct rte_memzone *mz;
726 struct vmxnet3_tx_queue *txq;
727 struct vmxnet3_cmd_ring *ring;
728 struct vmxnet3_comp_ring *comp_ring;
729 struct vmxnet3_data_ring *data_ring;
732 PMD_INIT_FUNC_TRACE();
734 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
735 ETH_TXQ_FLAGS_NOMULTSEGS) {
736 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
740 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
741 ETH_TXQ_FLAGS_NOOFFLOADS) {
742 PMD_INIT_LOG(ERR, "TX not support offload function yet");
746 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
748 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
752 txq->queue_id = queue_idx;
753 txq->port_id = dev->data->port_id;
754 txq->shared = &hw->tqd_start[queue_idx];
756 txq->qid = queue_idx;
759 ring = &txq->cmd_ring;
760 comp_ring = &txq->comp_ring;
761 data_ring = &txq->data_ring;
763 /* Tx vmxnet ring length should be between 512-4096 */
764 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
765 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
766 VMXNET3_DEF_TX_RING_SIZE);
768 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
769 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
770 VMXNET3_TX_RING_MAX_SIZE);
773 ring->size = nb_desc;
774 ring->size &= ~VMXNET3_RING_SIZE_MASK;
776 comp_ring->size = data_ring->size = ring->size;
778 /* Tx vmxnet rings structure initialization*/
781 ring->gen = VMXNET3_INIT_GEN;
782 comp_ring->next2proc = 0;
783 comp_ring->gen = VMXNET3_INIT_GEN;
785 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
786 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
787 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
789 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
791 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
794 memset(mz->addr, 0, mz->len);
796 /* cmd_ring initialization */
797 ring->base = mz->addr;
798 ring->basePA = mz->phys_addr;
800 /* comp_ring initialization */
801 comp_ring->base = ring->base + ring->size;
802 comp_ring->basePA = ring->basePA +
803 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
805 /* data_ring initialization */
806 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
807 data_ring->basePA = comp_ring->basePA +
808 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
810 /* cmd_ring0 buf_info allocation */
811 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
812 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
813 if (ring->buf_info == NULL) {
814 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
818 /* Update the data portion with txq */
819 dev->data->tx_queues[queue_idx] = txq;
825 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
828 unsigned int socket_id,
829 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
830 struct rte_mempool *mp)
832 const struct rte_memzone *mz;
833 struct vmxnet3_rx_queue *rxq;
834 struct vmxnet3_hw *hw = dev->data->dev_private;
835 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
836 struct vmxnet3_comp_ring *comp_ring;
841 struct rte_pktmbuf_pool_private *mbp_priv;
843 PMD_INIT_FUNC_TRACE();
845 mbp_priv = (struct rte_pktmbuf_pool_private *)
846 rte_mempool_get_priv(mp);
847 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
848 RTE_PKTMBUF_HEADROOM);
850 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
851 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
852 "VMXNET3 don't support scatter packets yet",
853 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
857 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
859 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
864 rxq->queue_id = queue_idx;
865 rxq->port_id = dev->data->port_id;
866 rxq->shared = &hw->rqd_start[queue_idx];
868 rxq->qid1 = queue_idx;
869 rxq->qid2 = queue_idx + hw->num_rx_queues;
872 ring0 = &rxq->cmd_ring[0];
873 ring1 = &rxq->cmd_ring[1];
874 comp_ring = &rxq->comp_ring;
876 /* Rx vmxnet rings length should be between 256-4096 */
877 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
878 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
880 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
881 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
884 ring0->size = nb_desc;
885 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
886 ring1->size = ring0->size;
889 comp_ring->size = ring0->size + ring1->size;
891 /* Rx vmxnet rings structure initialization */
892 ring0->next2fill = 0;
893 ring1->next2fill = 0;
894 ring0->next2comp = 0;
895 ring1->next2comp = 0;
896 ring0->gen = VMXNET3_INIT_GEN;
897 ring1->gen = VMXNET3_INIT_GEN;
898 comp_ring->next2proc = 0;
899 comp_ring->gen = VMXNET3_INIT_GEN;
901 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
902 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
904 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
906 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
909 memset(mz->addr, 0, mz->len);
911 /* cmd_ring0 initialization */
912 ring0->base = mz->addr;
913 ring0->basePA = mz->phys_addr;
915 /* cmd_ring1 initialization */
916 ring1->base = ring0->base + ring0->size;
917 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
919 /* comp_ring initialization */
920 comp_ring->base = ring1->base + ring1->size;
921 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
924 /* cmd_ring0-cmd_ring1 buf_info allocation */
925 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
927 ring = &rxq->cmd_ring[i];
929 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
931 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
932 if (ring->buf_info == NULL) {
933 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
938 /* Update the data portion with rxq */
939 dev->data->rx_queues[queue_idx] = rxq;
945 * Initializes Receive Unit
946 * Load mbufs in rx queue in advance
949 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
951 struct vmxnet3_hw *hw = dev->data->dev_private;
956 PMD_INIT_FUNC_TRACE();
958 for (i = 0; i < hw->num_rx_queues; i++) {
959 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
961 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
962 /* Passing 0 as alloc_num will allocate full ring */
963 ret = vmxnet3_post_rx_bufs(rxq, j);
965 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
968 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
969 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
970 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
971 rxq->cmd_ring[j].next2fill);
974 rxq->stopped = FALSE;
977 for (i = 0; i < dev->data->nb_tx_queues; i++) {
978 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
980 txq->stopped = FALSE;
986 static uint8_t rss_intel_key[40] = {
987 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
988 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
989 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
990 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
991 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
995 * Configure RSS feature
998 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1000 struct vmxnet3_hw *hw = dev->data->dev_private;
1001 struct VMXNET3_RSSConf *dev_rss_conf;
1002 struct rte_eth_rss_conf *port_rss_conf;
1006 PMD_INIT_FUNC_TRACE();
1008 dev_rss_conf = hw->rss_conf;
1009 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1011 /* loading hashFunc */
1012 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1013 /* loading hashKeySize */
1014 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1015 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1016 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1018 if (port_rss_conf->rss_key == NULL) {
1019 /* Default hash key */
1020 port_rss_conf->rss_key = rss_intel_key;
1023 /* loading hashKey */
1024 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1026 /* loading indTable */
1027 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1028 if (j == dev->data->nb_rx_queues)
1030 dev_rss_conf->indTable[i] = j;
1033 /* loading hashType */
1034 dev_rss_conf->hashType = 0;
1035 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1036 if (rss_hf & ETH_RSS_IPV4)
1037 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1038 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1039 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1040 if (rss_hf & ETH_RSS_IPV6)
1041 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1042 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1043 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1045 return VMXNET3_SUCCESS;
1049 * Configure VLAN Filter feature
1052 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1055 struct vmxnet3_hw *hw = dev->data->dev_private;
1056 uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1058 PMD_INIT_FUNC_TRACE();
1060 /* Verify if this tag is already set */
1061 for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1062 /* Filter all vlan tags out by default */
1064 /* To-Do: Provide another routine in dev_ops for user config */
1066 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1067 dev->data->port_id, vf_table[i]);
1070 return VMXNET3_SUCCESS;