4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81 (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
89 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 static inline struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
100 m = __rte_mbuf_raw_alloc(mp);
101 __rte_mbuf_sanity_check_raw(m, 0);
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
115 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119 (unsigned long)rxq->cmd_ring[0].basePA,
120 (unsigned long)rxq->cmd_ring[1].basePA,
121 (unsigned long)rxq->comp_ring.basePA);
123 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[0].size, avail,
127 rxq->comp_ring.next2proc,
128 rxq->cmd_ring[0].size - avail);
130 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133 rxq->cmd_ring[1].size - avail);
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
145 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148 (unsigned long)txq->cmd_ring.basePA,
149 (unsigned long)txq->comp_ring.basePA,
150 (unsigned long)txq->data_ring.basePA);
152 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154 (uint32_t)txq->cmd_ring.size, avail,
155 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 while (ring->next2comp != ring->next2fill) {
163 /* No need to worry about tx desc ownership, device is quiesced by now. */
164 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
167 rte_pktmbuf_free(buf_info->m);
172 vmxnet3_cmd_ring_adv_next2comp(ring);
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 vmxnet3_cmd_ring_release_mbufs(ring);
180 rte_free(ring->buf_info);
181 ring->buf_info = NULL;
186 vmxnet3_dev_tx_queue_release(void *txq)
188 vmxnet3_tx_queue_t *tq = txq;
191 /* Release the cmd_ring */
192 vmxnet3_cmd_ring_release(&tq->cmd_ring);
197 vmxnet3_dev_rx_queue_release(void *rxq)
200 vmxnet3_rx_queue_t *rq = rxq;
203 /* Release both the cmd_rings */
204 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
210 vmxnet3_dev_tx_queue_reset(void *txq)
212 vmxnet3_tx_queue_t *tq = txq;
213 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
219 /* Release the cmd_ring mbufs */
220 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
223 /* Tx vmxnet rings structure initialization*/
226 ring->gen = VMXNET3_INIT_GEN;
227 comp_ring->next2proc = 0;
228 comp_ring->gen = VMXNET3_INIT_GEN;
230 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
234 memset(ring->base, 0, size);
238 vmxnet3_dev_rx_queue_reset(void *rxq)
241 vmxnet3_rx_queue_t *rq = rxq;
242 struct vmxnet3_cmd_ring *ring0, *ring1;
243 struct vmxnet3_comp_ring *comp_ring;
247 /* Release both the cmd_rings mbufs */
248 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
252 ring0 = &rq->cmd_ring[0];
253 ring1 = &rq->cmd_ring[1];
254 comp_ring = &rq->comp_ring;
256 /* Rx vmxnet rings structure initialization */
257 ring0->next2fill = 0;
258 ring1->next2fill = 0;
259 ring0->next2comp = 0;
260 ring1->next2comp = 0;
261 ring0->gen = VMXNET3_INIT_GEN;
262 ring1->gen = VMXNET3_INIT_GEN;
263 comp_ring->next2proc = 0;
264 comp_ring->gen = VMXNET3_INIT_GEN;
266 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
269 memset(ring0->base, 0, size);
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
277 PMD_INIT_FUNC_TRACE();
279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
284 vmxnet3_dev_tx_queue_reset(txq);
288 for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
293 vmxnet3_dev_rx_queue_reset(rxq);
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
302 struct rte_mbuf *mbuf;
303 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305 (comp_ring->base + comp_ring->next2proc);
307 while (tcd->gen == comp_ring->gen) {
308 /* Release cmd_ring descriptor and free mbuf */
309 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
310 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
312 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
313 mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
314 txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
315 rte_pktmbuf_free_seg(mbuf);
317 /* Mark the txd for which tcd was generated as completed */
318 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322 vmxnet3_comp_ring_adv_next2proc(comp_ring);
323 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
324 comp_ring->next2proc);
327 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
331 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
335 vmxnet3_tx_queue_t *txq = tx_queue;
336 struct vmxnet3_hw *hw = txq->hw;
338 if (unlikely(txq->stopped)) {
339 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
343 /* Free up the comp_descriptors aggressively */
344 vmxnet3_tq_tx_complete(txq);
347 while (nb_tx < nb_pkts) {
348 Vmxnet3_GenericDesc *gdesc;
349 vmxnet3_buf_info_t *tbi;
350 uint32_t first2fill, avail, dw2;
351 struct rte_mbuf *txm = tx_pkts[nb_tx];
352 struct rte_mbuf *m_seg = txm;
354 /* Is this packet execessively fragmented, then drop */
355 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
356 ++txq->stats.drop_too_many_segs;
357 ++txq->stats.drop_total;
358 rte_pktmbuf_free(txm);
363 /* Is command ring full? */
364 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
365 if (txm->nb_segs > avail) {
366 ++txq->stats.tx_ring_full;
370 /* use the previous gen bit for the SOP desc */
371 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
372 first2fill = txq->cmd_ring.next2fill;
374 /* Remember the transmit buffer for cleanup */
375 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
378 /* NB: the following assumes that VMXNET3 maximum
379 transmit buffer size (16K) is greater than
380 maximum sizeof mbuf segment size. */
381 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
382 gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
383 gdesc->dword[2] = dw2 | m_seg->data_len;
386 /* move to the next2fill descriptor */
387 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
389 /* use the right gen for non-SOP desc */
390 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
391 } while ((m_seg = m_seg->next) != NULL);
393 /* Update the EOP descriptor */
394 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
396 /* Add VLAN tag if present */
397 gdesc = txq->cmd_ring.base + first2fill;
398 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
400 gdesc->txd.tci = txm->vlan_tci;
403 /* TODO: Add transmit checksum offload here */
405 /* flip the GEN bit on the SOP */
406 rte_compiler_barrier();
407 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
409 txq->shared->ctrl.txNumDeferred++;
413 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
415 if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
417 txq->shared->ctrl.txNumDeferred = 0;
418 /* Notify vSwitch that packets are available. */
419 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
420 txq->cmd_ring.next2fill);
427 * Allocates mbufs and clusters. Post rx descriptors with buffer details
428 * so that device can receive packets in those buffers.
430 * Among the two rings, 1st ring contains buffers of type 0 and type1.
431 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
432 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
433 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
438 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
441 uint32_t i = 0, val = 0;
442 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
445 /* Usually: One HEAD type buf per packet
446 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
447 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
450 /* We use single packet buffer so all heads here */
451 val = VMXNET3_RXD_BTYPE_HEAD;
453 /* All BODY type buffers for 2nd ring */
454 val = VMXNET3_RXD_BTYPE_BODY;
457 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
458 struct Vmxnet3_RxDesc *rxd;
459 struct rte_mbuf *mbuf;
460 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
462 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
464 /* Allocate blank mbuf for the current Rx Descriptor */
465 mbuf = rte_rxmbuf_alloc(rxq->mp);
466 if (unlikely(mbuf == NULL)) {
467 PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
468 rxq->stats.rx_buf_alloc_failure++;
474 * Load mbuf pointer into buf_info[ring_size]
475 * buf_info structure is equivalent to cookie for virtio-virtqueue
478 buf_info->len = (uint16_t)(mbuf->buf_len -
479 RTE_PKTMBUF_HEADROOM);
480 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
482 /* Load Rx Descriptor with the buffer's GPA */
483 rxd->addr = buf_info->bufPA;
485 /* After this point rxd->addr MUST not be NULL */
487 rxd->len = buf_info->len;
488 /* Flip gen bit at the end to change ownership */
489 rxd->gen = ring->gen;
491 vmxnet3_cmd_ring_adv_next2fill(ring);
495 /* Return error only if no buffers are posted at present */
496 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
503 /* Receive side checksum and other offloads */
505 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
507 /* Check for hardware stripped VLAN tag */
509 rxm->ol_flags |= PKT_RX_VLAN_PKT;
510 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
514 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
515 rxm->ol_flags |= PKT_RX_RSS_HASH;
516 rxm->hash.rss = rcd->rssHash;
519 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
521 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
522 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
524 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
525 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
527 rxm->ol_flags |= PKT_RX_IPV4_HDR;
531 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
533 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
534 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
540 * Process the Rx Completion Ring of given vmxnet3_rx_queue
541 * for nb_pkts burst and return the number of packets received
544 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
547 uint32_t nb_rxd, idx;
549 vmxnet3_rx_queue_t *rxq;
550 Vmxnet3_RxCompDesc *rcd;
551 vmxnet3_buf_info_t *rbi;
553 struct rte_mbuf *rxm = NULL;
554 struct vmxnet3_hw *hw;
564 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
566 if (unlikely(rxq->stopped)) {
567 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
571 while (rcd->gen == rxq->comp_ring.gen) {
572 if (nb_rx >= nb_pkts)
576 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
577 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
578 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
580 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
581 rte_pktmbuf_free_seg(rbi->m);
582 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
586 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
588 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
589 VMXNET3_ASSERT(rcd->len <= rxd->len);
590 VMXNET3_ASSERT(rbi->m);
592 if (unlikely(rcd->len == 0)) {
593 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
595 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
596 VMXNET3_ASSERT(rcd->sop && rcd->eop);
598 rte_pktmbuf_free_seg(rbi->m);
602 /* Assuming a packet is coming in a single packet buffer */
603 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
605 "Alert : Misbehaving device, incorrect "
606 " buffer type used. iPacket dropped.");
607 rte_pktmbuf_free_seg(rbi->m);
610 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
611 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
613 /* Get the packet buffer pointer from buf_info */
616 /* Clear descriptor associated buf_info to be reused */
620 /* Update the index that we received a packet */
621 rxq->cmd_ring[ring_idx].next2comp = idx;
623 /* For RCD with EOP set, check if there is frame error */
624 if (unlikely(rcd->err)) {
625 rxq->stats.drop_total++;
626 rxq->stats.drop_err++;
629 rxq->stats.drop_fcs++;
630 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
632 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
633 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
634 rxq->comp_ring.base), rcd->rxdIdx);
635 rte_pktmbuf_free_seg(rxm);
640 /* Initialize newly received packet buffer */
641 rxm->port = rxq->port_id;
644 rxm->pkt_len = (uint16_t)rcd->len;
645 rxm->data_len = (uint16_t)rcd->len;
646 rxm->data_off = RTE_PKTMBUF_HEADROOM;
650 vmxnet3_rx_offload(rcd, rxm);
652 rx_pkts[nb_rx++] = rxm;
654 rxq->cmd_ring[ring_idx].next2comp = idx;
655 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
657 /* It's time to allocate some new buf and renew descriptors */
658 vmxnet3_post_rx_bufs(rxq, ring_idx);
659 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
660 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
661 rxq->cmd_ring[ring_idx].next2fill);
664 /* Advance to the next descriptor in comp_ring */
665 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
667 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
669 if (nb_rxd > rxq->cmd_ring[0].size) {
671 "Used up quota of receiving packets,"
672 " relinquish control.");
681 * Create memzone for device rings. malloc can't be used as the physical address is
682 * needed. If the memzone is already created, then this function returns a ptr
685 static const struct rte_memzone *
686 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
687 uint16_t queue_id, uint32_t ring_size, int socket_id)
689 char z_name[RTE_MEMZONE_NAMESIZE];
690 const struct rte_memzone *mz;
692 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
693 dev->driver->pci_drv.name, ring_name,
694 dev->data->port_id, queue_id);
696 mz = rte_memzone_lookup(z_name);
700 return rte_memzone_reserve_aligned(z_name, ring_size,
701 socket_id, 0, VMXNET3_RING_BA_ALIGN);
705 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
708 unsigned int socket_id,
709 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
711 struct vmxnet3_hw *hw = dev->data->dev_private;
712 const struct rte_memzone *mz;
713 struct vmxnet3_tx_queue *txq;
714 struct vmxnet3_cmd_ring *ring;
715 struct vmxnet3_comp_ring *comp_ring;
716 struct vmxnet3_data_ring *data_ring;
719 PMD_INIT_FUNC_TRACE();
721 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
722 ETH_TXQ_FLAGS_NOXSUMS) {
723 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
727 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
729 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
733 txq->queue_id = queue_idx;
734 txq->port_id = dev->data->port_id;
735 txq->shared = &hw->tqd_start[queue_idx];
737 txq->qid = queue_idx;
740 ring = &txq->cmd_ring;
741 comp_ring = &txq->comp_ring;
742 data_ring = &txq->data_ring;
744 /* Tx vmxnet ring length should be between 512-4096 */
745 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
746 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
747 VMXNET3_DEF_TX_RING_SIZE);
749 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
750 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
751 VMXNET3_TX_RING_MAX_SIZE);
754 ring->size = nb_desc;
755 ring->size &= ~VMXNET3_RING_SIZE_MASK;
757 comp_ring->size = data_ring->size = ring->size;
759 /* Tx vmxnet rings structure initialization*/
762 ring->gen = VMXNET3_INIT_GEN;
763 comp_ring->next2proc = 0;
764 comp_ring->gen = VMXNET3_INIT_GEN;
766 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
767 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
768 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
770 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
772 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
775 memset(mz->addr, 0, mz->len);
777 /* cmd_ring initialization */
778 ring->base = mz->addr;
779 ring->basePA = mz->phys_addr;
781 /* comp_ring initialization */
782 comp_ring->base = ring->base + ring->size;
783 comp_ring->basePA = ring->basePA +
784 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
786 /* data_ring initialization */
787 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
788 data_ring->basePA = comp_ring->basePA +
789 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
791 /* cmd_ring0 buf_info allocation */
792 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
793 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
794 if (ring->buf_info == NULL) {
795 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
799 /* Update the data portion with txq */
800 dev->data->tx_queues[queue_idx] = txq;
806 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
809 unsigned int socket_id,
810 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
811 struct rte_mempool *mp)
813 const struct rte_memzone *mz;
814 struct vmxnet3_rx_queue *rxq;
815 struct vmxnet3_hw *hw = dev->data->dev_private;
816 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
817 struct vmxnet3_comp_ring *comp_ring;
823 PMD_INIT_FUNC_TRACE();
825 buf_size = rte_pktmbuf_data_room_size(mp) -
826 RTE_PKTMBUF_HEADROOM;
828 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
829 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
830 "VMXNET3 don't support scatter packets yet",
831 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
835 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
837 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
842 rxq->queue_id = queue_idx;
843 rxq->port_id = dev->data->port_id;
844 rxq->shared = &hw->rqd_start[queue_idx];
846 rxq->qid1 = queue_idx;
847 rxq->qid2 = queue_idx + hw->num_rx_queues;
850 ring0 = &rxq->cmd_ring[0];
851 ring1 = &rxq->cmd_ring[1];
852 comp_ring = &rxq->comp_ring;
854 /* Rx vmxnet rings length should be between 256-4096 */
855 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
856 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
858 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
859 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
862 ring0->size = nb_desc;
863 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
864 ring1->size = ring0->size;
867 comp_ring->size = ring0->size + ring1->size;
869 /* Rx vmxnet rings structure initialization */
870 ring0->next2fill = 0;
871 ring1->next2fill = 0;
872 ring0->next2comp = 0;
873 ring1->next2comp = 0;
874 ring0->gen = VMXNET3_INIT_GEN;
875 ring1->gen = VMXNET3_INIT_GEN;
876 comp_ring->next2proc = 0;
877 comp_ring->gen = VMXNET3_INIT_GEN;
879 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
880 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
882 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
884 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
887 memset(mz->addr, 0, mz->len);
889 /* cmd_ring0 initialization */
890 ring0->base = mz->addr;
891 ring0->basePA = mz->phys_addr;
893 /* cmd_ring1 initialization */
894 ring1->base = ring0->base + ring0->size;
895 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
897 /* comp_ring initialization */
898 comp_ring->base = ring1->base + ring1->size;
899 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
902 /* cmd_ring0-cmd_ring1 buf_info allocation */
903 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
905 ring = &rxq->cmd_ring[i];
907 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
909 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
910 if (ring->buf_info == NULL) {
911 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
916 /* Update the data portion with rxq */
917 dev->data->rx_queues[queue_idx] = rxq;
923 * Initializes Receive Unit
924 * Load mbufs in rx queue in advance
927 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
929 struct vmxnet3_hw *hw = dev->data->dev_private;
934 PMD_INIT_FUNC_TRACE();
936 for (i = 0; i < hw->num_rx_queues; i++) {
937 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
939 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
940 /* Passing 0 as alloc_num will allocate full ring */
941 ret = vmxnet3_post_rx_bufs(rxq, j);
943 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
946 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
947 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
948 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
949 rxq->cmd_ring[j].next2fill);
952 rxq->stopped = FALSE;
955 for (i = 0; i < dev->data->nb_tx_queues; i++) {
956 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
958 txq->stopped = FALSE;
964 static uint8_t rss_intel_key[40] = {
965 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
966 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
967 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
968 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
969 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
973 * Configure RSS feature
976 vmxnet3_rss_configure(struct rte_eth_dev *dev)
978 struct vmxnet3_hw *hw = dev->data->dev_private;
979 struct VMXNET3_RSSConf *dev_rss_conf;
980 struct rte_eth_rss_conf *port_rss_conf;
984 PMD_INIT_FUNC_TRACE();
986 dev_rss_conf = hw->rss_conf;
987 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
989 /* loading hashFunc */
990 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
991 /* loading hashKeySize */
992 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
993 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
994 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
996 if (port_rss_conf->rss_key == NULL) {
997 /* Default hash key */
998 port_rss_conf->rss_key = rss_intel_key;
1001 /* loading hashKey */
1002 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1004 /* loading indTable */
1005 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1006 if (j == dev->data->nb_rx_queues)
1008 dev_rss_conf->indTable[i] = j;
1011 /* loading hashType */
1012 dev_rss_conf->hashType = 0;
1013 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1014 if (rss_hf & ETH_RSS_IPV4)
1015 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1016 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1017 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1018 if (rss_hf & ETH_RSS_IPV6)
1019 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1020 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1021 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1023 return VMXNET3_SUCCESS;