4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, 0);
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
112 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113 (unsigned long)rxq->cmd_ring[0].basePA,
114 (unsigned long)rxq->cmd_ring[1].basePA,
115 (unsigned long)rxq->comp_ring.basePA);
117 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
119 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120 (uint32_t)rxq->cmd_ring[0].size, avail,
121 rxq->comp_ring.next2proc,
122 rxq->cmd_ring[0].size - avail);
124 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127 rxq->cmd_ring[1].size - avail);
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142 (unsigned long)txq->cmd_ring.basePA,
143 (unsigned long)txq->comp_ring.basePA,
144 (unsigned long)txq->data_ring.basePA);
146 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148 (uint32_t)txq->cmd_ring.size, avail,
149 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
156 while (ring->next2comp != ring->next2fill) {
157 /* No need to worry about tx desc ownership, device is quiesced by now. */
158 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
161 rte_pktmbuf_free(buf_info->m);
166 vmxnet3_cmd_ring_adv_next2comp(ring);
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
173 vmxnet3_cmd_ring_release_mbufs(ring);
174 rte_free(ring->buf_info);
175 ring->buf_info = NULL;
180 vmxnet3_dev_tx_queue_release(void *txq)
182 vmxnet3_tx_queue_t *tq = txq;
185 /* Release the cmd_ring */
186 vmxnet3_cmd_ring_release(&tq->cmd_ring);
191 vmxnet3_dev_rx_queue_release(void *rxq)
194 vmxnet3_rx_queue_t *rq = rxq;
197 /* Release both the cmd_rings */
198 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
204 vmxnet3_dev_tx_queue_reset(void *txq)
206 vmxnet3_tx_queue_t *tq = txq;
207 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 /* Release the cmd_ring mbufs */
214 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
217 /* Tx vmxnet rings structure initialization*/
220 ring->gen = VMXNET3_INIT_GEN;
221 comp_ring->next2proc = 0;
222 comp_ring->gen = VMXNET3_INIT_GEN;
224 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
228 memset(ring->base, 0, size);
232 vmxnet3_dev_rx_queue_reset(void *rxq)
235 vmxnet3_rx_queue_t *rq = rxq;
236 struct vmxnet3_cmd_ring *ring0, *ring1;
237 struct vmxnet3_comp_ring *comp_ring;
241 /* Release both the cmd_rings mbufs */
242 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
246 ring0 = &rq->cmd_ring[0];
247 ring1 = &rq->cmd_ring[1];
248 comp_ring = &rq->comp_ring;
250 /* Rx vmxnet rings structure initialization */
251 ring0->next2fill = 0;
252 ring1->next2fill = 0;
253 ring0->next2comp = 0;
254 ring1->next2comp = 0;
255 ring0->gen = VMXNET3_INIT_GEN;
256 ring1->gen = VMXNET3_INIT_GEN;
257 comp_ring->next2proc = 0;
258 comp_ring->gen = VMXNET3_INIT_GEN;
260 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
263 memset(ring0->base, 0, size);
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
271 PMD_INIT_FUNC_TRACE();
273 for (i = 0; i < dev->data->nb_tx_queues; i++) {
274 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
278 vmxnet3_dev_tx_queue_reset(txq);
282 for (i = 0; i < dev->data->nb_rx_queues; i++) {
283 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
287 vmxnet3_dev_rx_queue_reset(rxq);
293 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
296 struct rte_mbuf *mbuf;
297 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
298 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
299 (comp_ring->base + comp_ring->next2proc);
301 while (tcd->gen == comp_ring->gen) {
302 /* Release cmd_ring descriptor and free mbuf */
303 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
304 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
305 mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
306 txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
307 rte_pktmbuf_free_seg(mbuf);
309 /* Mark the txd for which tcd was generated as completed */
310 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
314 vmxnet3_comp_ring_adv_next2proc(comp_ring);
315 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
316 comp_ring->next2proc);
319 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
323 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
327 vmxnet3_tx_queue_t *txq = tx_queue;
328 struct vmxnet3_hw *hw = txq->hw;
330 if (unlikely(txq->stopped)) {
331 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
335 /* Free up the comp_descriptors aggressively */
336 vmxnet3_tq_tx_complete(txq);
339 while (nb_tx < nb_pkts) {
340 Vmxnet3_GenericDesc *gdesc;
341 vmxnet3_buf_info_t *tbi;
342 uint32_t first2fill, avail, dw2;
343 struct rte_mbuf *txm = tx_pkts[nb_tx];
344 struct rte_mbuf *m_seg = txm;
347 /* Is this packet execessively fragmented, then drop */
348 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
349 ++txq->stats.drop_too_many_segs;
350 ++txq->stats.drop_total;
351 rte_pktmbuf_free(txm);
356 /* Is command ring full? */
357 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
358 if (txm->nb_segs > avail) {
359 ++txq->stats.tx_ring_full;
363 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
364 struct Vmxnet3_TxDataDesc *tdd;
366 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
367 copy_size = rte_pktmbuf_pkt_len(txm);
368 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
371 /* use the previous gen bit for the SOP desc */
372 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
373 first2fill = txq->cmd_ring.next2fill;
375 /* Remember the transmit buffer for cleanup */
376 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
379 /* NB: the following assumes that VMXNET3 maximum
380 transmit buffer size (16K) is greater than
381 maximum sizeof mbuf segment size. */
382 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
384 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
385 txq->cmd_ring.next2fill *
386 sizeof(struct Vmxnet3_TxDataDesc));
388 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
390 gdesc->dword[2] = dw2 | m_seg->data_len;
393 /* move to the next2fill descriptor */
394 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
396 /* use the right gen for non-SOP desc */
397 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
398 } while ((m_seg = m_seg->next) != NULL);
400 /* Update the EOP descriptor */
401 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
403 /* Add VLAN tag if present */
404 gdesc = txq->cmd_ring.base + first2fill;
405 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
407 gdesc->txd.tci = txm->vlan_tci;
410 /* TODO: Add transmit checksum offload here */
412 /* flip the GEN bit on the SOP */
413 rte_compiler_barrier();
414 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
416 txq->shared->ctrl.txNumDeferred++;
420 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
422 if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
424 txq->shared->ctrl.txNumDeferred = 0;
425 /* Notify vSwitch that packets are available. */
426 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
427 txq->cmd_ring.next2fill);
434 * Allocates mbufs and clusters. Post rx descriptors with buffer details
435 * so that device can receive packets in those buffers.
437 * Among the two rings, 1st ring contains buffers of type 0 and type1.
438 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
439 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
440 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
445 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
448 uint32_t i = 0, val = 0;
449 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
452 /* Usually: One HEAD type buf per packet
453 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
454 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
457 /* We use single packet buffer so all heads here */
458 val = VMXNET3_RXD_BTYPE_HEAD;
460 /* All BODY type buffers for 2nd ring */
461 val = VMXNET3_RXD_BTYPE_BODY;
464 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
465 struct Vmxnet3_RxDesc *rxd;
466 struct rte_mbuf *mbuf;
467 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
469 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
471 /* Allocate blank mbuf for the current Rx Descriptor */
472 mbuf = rte_rxmbuf_alloc(rxq->mp);
473 if (unlikely(mbuf == NULL)) {
474 PMD_RX_LOG(ERR, "Error allocating mbuf");
475 rxq->stats.rx_buf_alloc_failure++;
481 * Load mbuf pointer into buf_info[ring_size]
482 * buf_info structure is equivalent to cookie for virtio-virtqueue
485 buf_info->len = (uint16_t)(mbuf->buf_len -
486 RTE_PKTMBUF_HEADROOM);
488 rte_mbuf_data_dma_addr_default(mbuf);
490 /* Load Rx Descriptor with the buffer's GPA */
491 rxd->addr = buf_info->bufPA;
493 /* After this point rxd->addr MUST not be NULL */
495 rxd->len = buf_info->len;
496 /* Flip gen bit at the end to change ownership */
497 rxd->gen = ring->gen;
499 vmxnet3_cmd_ring_adv_next2fill(ring);
503 /* Return error only if no buffers are posted at present */
504 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
511 /* Receive side checksum and other offloads */
513 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
515 /* Check for hardware stripped VLAN tag */
517 rxm->ol_flags |= PKT_RX_VLAN_PKT;
518 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
522 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
523 rxm->ol_flags |= PKT_RX_RSS_HASH;
524 rxm->hash.rss = rcd->rssHash;
527 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
529 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
530 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
532 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
533 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
535 rxm->packet_type = RTE_PTYPE_L3_IPV4;
539 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
541 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
542 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
548 * Process the Rx Completion Ring of given vmxnet3_rx_queue
549 * for nb_pkts burst and return the number of packets received
552 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
555 uint32_t nb_rxd, idx;
557 vmxnet3_rx_queue_t *rxq;
558 Vmxnet3_RxCompDesc *rcd;
559 vmxnet3_buf_info_t *rbi;
561 struct rte_mbuf *rxm = NULL;
562 struct vmxnet3_hw *hw;
572 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
574 if (unlikely(rxq->stopped)) {
575 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
579 while (rcd->gen == rxq->comp_ring.gen) {
580 if (nb_rx >= nb_pkts)
584 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
585 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
586 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
588 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
589 rte_pktmbuf_free_seg(rbi->m);
590 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
594 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
596 VMXNET3_ASSERT(rcd->len <= rxd->len);
597 VMXNET3_ASSERT(rbi->m);
599 if (unlikely(rcd->len == 0)) {
600 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
602 VMXNET3_ASSERT(rcd->sop && rcd->eop);
603 rte_pktmbuf_free_seg(rbi->m);
607 /* Assuming a packet is coming in a single packet buffer */
608 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
610 "Alert : Misbehaving device, incorrect "
611 " buffer type used. Packet dropped.");
612 rte_pktmbuf_free_seg(rbi->m);
615 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
617 /* Get the packet buffer pointer from buf_info */
620 /* Clear descriptor associated buf_info to be reused */
624 /* Update the index that we received a packet */
625 rxq->cmd_ring[ring_idx].next2comp = idx;
627 /* For RCD with EOP set, check if there is frame error */
628 if (unlikely(rcd->err)) {
629 rxq->stats.drop_total++;
630 rxq->stats.drop_err++;
633 rxq->stats.drop_fcs++;
634 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
636 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
637 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
638 rxq->comp_ring.base), rcd->rxdIdx);
639 rte_pktmbuf_free_seg(rxm);
644 /* Initialize newly received packet buffer */
645 rxm->port = rxq->port_id;
648 rxm->pkt_len = (uint16_t)rcd->len;
649 rxm->data_len = (uint16_t)rcd->len;
650 rxm->data_off = RTE_PKTMBUF_HEADROOM;
654 vmxnet3_rx_offload(rcd, rxm);
656 rx_pkts[nb_rx++] = rxm;
658 rxq->cmd_ring[ring_idx].next2comp = idx;
659 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
661 /* It's time to allocate some new buf and renew descriptors */
662 vmxnet3_post_rx_bufs(rxq, ring_idx);
663 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
664 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
665 rxq->cmd_ring[ring_idx].next2fill);
668 /* Advance to the next descriptor in comp_ring */
669 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
671 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
673 if (nb_rxd > rxq->cmd_ring[0].size) {
675 "Used up quota of receiving packets,"
676 " relinquish control.");
685 * Create memzone for device rings. malloc can't be used as the physical address is
686 * needed. If the memzone is already created, then this function returns a ptr
689 static const struct rte_memzone *
690 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
691 uint16_t queue_id, uint32_t ring_size, int socket_id)
693 char z_name[RTE_MEMZONE_NAMESIZE];
694 const struct rte_memzone *mz;
696 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
697 dev->driver->pci_drv.name, ring_name,
698 dev->data->port_id, queue_id);
700 mz = rte_memzone_lookup(z_name);
704 return rte_memzone_reserve_aligned(z_name, ring_size,
705 socket_id, 0, VMXNET3_RING_BA_ALIGN);
709 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
712 unsigned int socket_id,
713 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
715 struct vmxnet3_hw *hw = dev->data->dev_private;
716 const struct rte_memzone *mz;
717 struct vmxnet3_tx_queue *txq;
718 struct vmxnet3_cmd_ring *ring;
719 struct vmxnet3_comp_ring *comp_ring;
720 struct vmxnet3_data_ring *data_ring;
723 PMD_INIT_FUNC_TRACE();
725 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
726 ETH_TXQ_FLAGS_NOXSUMS) {
727 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
731 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
733 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
737 txq->queue_id = queue_idx;
738 txq->port_id = dev->data->port_id;
739 txq->shared = &hw->tqd_start[queue_idx];
741 txq->qid = queue_idx;
744 ring = &txq->cmd_ring;
745 comp_ring = &txq->comp_ring;
746 data_ring = &txq->data_ring;
748 /* Tx vmxnet ring length should be between 512-4096 */
749 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
750 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
751 VMXNET3_DEF_TX_RING_SIZE);
753 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
754 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
755 VMXNET3_TX_RING_MAX_SIZE);
758 ring->size = nb_desc;
759 ring->size &= ~VMXNET3_RING_SIZE_MASK;
761 comp_ring->size = data_ring->size = ring->size;
763 /* Tx vmxnet rings structure initialization*/
766 ring->gen = VMXNET3_INIT_GEN;
767 comp_ring->next2proc = 0;
768 comp_ring->gen = VMXNET3_INIT_GEN;
770 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
771 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
772 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
774 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
776 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
779 memset(mz->addr, 0, mz->len);
781 /* cmd_ring initialization */
782 ring->base = mz->addr;
783 ring->basePA = mz->phys_addr;
785 /* comp_ring initialization */
786 comp_ring->base = ring->base + ring->size;
787 comp_ring->basePA = ring->basePA +
788 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
790 /* data_ring initialization */
791 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
792 data_ring->basePA = comp_ring->basePA +
793 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
795 /* cmd_ring0 buf_info allocation */
796 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
797 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
798 if (ring->buf_info == NULL) {
799 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
803 /* Update the data portion with txq */
804 dev->data->tx_queues[queue_idx] = txq;
810 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
813 unsigned int socket_id,
814 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
815 struct rte_mempool *mp)
817 const struct rte_memzone *mz;
818 struct vmxnet3_rx_queue *rxq;
819 struct vmxnet3_hw *hw = dev->data->dev_private;
820 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
821 struct vmxnet3_comp_ring *comp_ring;
827 PMD_INIT_FUNC_TRACE();
829 buf_size = rte_pktmbuf_data_room_size(mp) -
830 RTE_PKTMBUF_HEADROOM;
832 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
833 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
834 "VMXNET3 don't support scatter packets yet",
835 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
839 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
841 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
846 rxq->queue_id = queue_idx;
847 rxq->port_id = dev->data->port_id;
848 rxq->shared = &hw->rqd_start[queue_idx];
850 rxq->qid1 = queue_idx;
851 rxq->qid2 = queue_idx + hw->num_rx_queues;
854 ring0 = &rxq->cmd_ring[0];
855 ring1 = &rxq->cmd_ring[1];
856 comp_ring = &rxq->comp_ring;
858 /* Rx vmxnet rings length should be between 256-4096 */
859 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
860 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
862 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
863 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
866 ring0->size = nb_desc;
867 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
868 ring1->size = ring0->size;
871 comp_ring->size = ring0->size + ring1->size;
873 /* Rx vmxnet rings structure initialization */
874 ring0->next2fill = 0;
875 ring1->next2fill = 0;
876 ring0->next2comp = 0;
877 ring1->next2comp = 0;
878 ring0->gen = VMXNET3_INIT_GEN;
879 ring1->gen = VMXNET3_INIT_GEN;
880 comp_ring->next2proc = 0;
881 comp_ring->gen = VMXNET3_INIT_GEN;
883 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
884 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
886 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
888 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
891 memset(mz->addr, 0, mz->len);
893 /* cmd_ring0 initialization */
894 ring0->base = mz->addr;
895 ring0->basePA = mz->phys_addr;
897 /* cmd_ring1 initialization */
898 ring1->base = ring0->base + ring0->size;
899 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
901 /* comp_ring initialization */
902 comp_ring->base = ring1->base + ring1->size;
903 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
906 /* cmd_ring0-cmd_ring1 buf_info allocation */
907 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
909 ring = &rxq->cmd_ring[i];
911 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
913 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
914 if (ring->buf_info == NULL) {
915 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
920 /* Update the data portion with rxq */
921 dev->data->rx_queues[queue_idx] = rxq;
927 * Initializes Receive Unit
928 * Load mbufs in rx queue in advance
931 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
933 struct vmxnet3_hw *hw = dev->data->dev_private;
938 PMD_INIT_FUNC_TRACE();
940 for (i = 0; i < hw->num_rx_queues; i++) {
941 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
943 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
944 /* Passing 0 as alloc_num will allocate full ring */
945 ret = vmxnet3_post_rx_bufs(rxq, j);
947 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
950 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
951 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
952 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
953 rxq->cmd_ring[j].next2fill);
956 rxq->stopped = FALSE;
959 for (i = 0; i < dev->data->nb_tx_queues; i++) {
960 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
962 txq->stopped = FALSE;
968 static uint8_t rss_intel_key[40] = {
969 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
970 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
971 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
972 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
973 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
977 * Configure RSS feature
980 vmxnet3_rss_configure(struct rte_eth_dev *dev)
982 struct vmxnet3_hw *hw = dev->data->dev_private;
983 struct VMXNET3_RSSConf *dev_rss_conf;
984 struct rte_eth_rss_conf *port_rss_conf;
988 PMD_INIT_FUNC_TRACE();
990 dev_rss_conf = hw->rss_conf;
991 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
993 /* loading hashFunc */
994 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
995 /* loading hashKeySize */
996 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
997 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
998 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1000 if (port_rss_conf->rss_key == NULL) {
1001 /* Default hash key */
1002 port_rss_conf->rss_key = rss_intel_key;
1005 /* loading hashKey */
1006 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1008 /* loading indTable */
1009 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1010 if (j == dev->data->nb_rx_queues)
1012 dev_rss_conf->indTable[i] = j;
1015 /* loading hashType */
1016 dev_rss_conf->hashType = 0;
1017 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1018 if (rss_hf & ETH_RSS_IPV4)
1019 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1020 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1021 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1022 if (rss_hf & ETH_RSS_IPV6)
1023 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1024 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1025 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1027 return VMXNET3_SUCCESS;