4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
75 #include "vmxnet3/vmxnet3_defs.h"
76 #include "vmxnet3_ring.h"
78 #include "vmxnet3_logs.h"
79 #include "vmxnet3_ethdev.h"
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82 (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
84 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
85 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
90 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
91 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
92 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
93 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
96 static inline struct rte_mbuf *
97 rte_rxmbuf_alloc(struct rte_mempool *mp)
101 m = __rte_mbuf_raw_alloc(mp);
102 __rte_mbuf_sanity_check_raw(m, 0);
106 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
116 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
117 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
119 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
120 (unsigned long)rxq->cmd_ring[0].basePA,
121 (unsigned long)rxq->cmd_ring[1].basePA,
122 (unsigned long)rxq->comp_ring.basePA);
124 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
126 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
127 (uint32_t)rxq->cmd_ring[0].size, avail,
128 rxq->comp_ring.next2proc,
129 rxq->cmd_ring[0].size - avail);
131 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
132 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
133 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
134 rxq->cmd_ring[1].size - avail);
139 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
146 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
147 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
148 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
149 (unsigned long)txq->cmd_ring.basePA,
150 (unsigned long)txq->comp_ring.basePA,
151 (unsigned long)txq->data_ring.basePA);
153 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155 (uint32_t)txq->cmd_ring.size, avail,
156 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
161 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
163 while (ring->next2comp != ring->next2fill) {
164 /* No need to worry about tx desc ownership, device is quiesced by now. */
165 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
168 rte_pktmbuf_free(buf_info->m);
173 vmxnet3_cmd_ring_adv_next2comp(ring);
178 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
180 vmxnet3_cmd_ring_release_mbufs(ring);
181 rte_free(ring->buf_info);
182 ring->buf_info = NULL;
187 vmxnet3_dev_tx_queue_release(void *txq)
189 vmxnet3_tx_queue_t *tq = txq;
192 /* Release the cmd_ring */
193 vmxnet3_cmd_ring_release(&tq->cmd_ring);
198 vmxnet3_dev_rx_queue_release(void *rxq)
201 vmxnet3_rx_queue_t *rq = rxq;
204 /* Release both the cmd_rings */
205 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
206 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
211 vmxnet3_dev_tx_queue_reset(void *txq)
213 vmxnet3_tx_queue_t *tq = txq;
214 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
215 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
216 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
220 /* Release the cmd_ring mbufs */
221 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
224 /* Tx vmxnet rings structure initialization*/
227 ring->gen = VMXNET3_INIT_GEN;
228 comp_ring->next2proc = 0;
229 comp_ring->gen = VMXNET3_INIT_GEN;
231 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
232 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
233 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
235 memset(ring->base, 0, size);
239 vmxnet3_dev_rx_queue_reset(void *rxq)
242 vmxnet3_rx_queue_t *rq = rxq;
243 struct vmxnet3_cmd_ring *ring0, *ring1;
244 struct vmxnet3_comp_ring *comp_ring;
248 /* Release both the cmd_rings mbufs */
249 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
250 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
253 ring0 = &rq->cmd_ring[0];
254 ring1 = &rq->cmd_ring[1];
255 comp_ring = &rq->comp_ring;
257 /* Rx vmxnet rings structure initialization */
258 ring0->next2fill = 0;
259 ring1->next2fill = 0;
260 ring0->next2comp = 0;
261 ring1->next2comp = 0;
262 ring0->gen = VMXNET3_INIT_GEN;
263 ring1->gen = VMXNET3_INIT_GEN;
264 comp_ring->next2proc = 0;
265 comp_ring->gen = VMXNET3_INIT_GEN;
267 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
268 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
270 memset(ring0->base, 0, size);
274 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
278 PMD_INIT_FUNC_TRACE();
280 for (i = 0; i < dev->data->nb_tx_queues; i++) {
281 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
285 vmxnet3_dev_tx_queue_reset(txq);
289 for (i = 0; i < dev->data->nb_rx_queues; i++) {
290 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
294 vmxnet3_dev_rx_queue_reset(rxq);
300 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
303 struct rte_mbuf *mbuf;
304 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
305 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
306 (comp_ring->base + comp_ring->next2proc);
308 while (tcd->gen == comp_ring->gen) {
310 /* Release cmd_ring descriptor and free mbuf */
311 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
312 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
314 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
315 if (unlikely(mbuf == NULL))
316 rte_panic("EOP desc does not point to a valid mbuf");
318 rte_pktmbuf_free(mbuf);
321 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
322 /* Mark the txd for which tcd was generated as completed */
323 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
325 vmxnet3_comp_ring_adv_next2proc(comp_ring);
326 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
327 comp_ring->next2proc);
331 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
335 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
339 Vmxnet3_TxDesc *txd = NULL;
340 vmxnet3_buf_info_t *tbi = NULL;
341 struct vmxnet3_hw *hw;
342 struct rte_mbuf *txm;
343 vmxnet3_tx_queue_t *txq = tx_queue;
347 if (unlikely(txq->stopped)) {
348 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
352 /* Free up the comp_descriptors aggressively */
353 vmxnet3_tq_tx_complete(txq);
356 while (nb_tx < nb_pkts) {
358 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
361 txm = tx_pkts[nb_tx];
362 /* Don't support scatter packets yet, free them if met */
363 if (txm->nb_segs != 1) {
364 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
365 rte_pktmbuf_free(tx_pkts[nb_tx]);
366 txq->stats.drop_total++;
372 /* Needs to minus ether header len */
373 if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
374 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
375 rte_pktmbuf_free(tx_pkts[nb_tx]);
376 txq->stats.drop_total++;
382 txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
383 if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
384 struct Vmxnet3_TxDataDesc *tdd;
386 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
387 copy_size = rte_pktmbuf_pkt_len(txm);
388 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
391 /* Fill the tx descriptor */
392 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
393 tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
395 txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
396 txq->cmd_ring.next2fill *
397 sizeof(struct Vmxnet3_TxDataDesc));
399 txd->addr = tbi->bufPA;
400 txd->len = txm->data_len;
402 /* Mark the last descriptor as End of Packet. */
406 /* Add VLAN tag if requested */
407 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
409 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
412 /* Record current mbuf for freeing it later in tx complete */
413 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
418 /* Set the offloading mode to default */
420 txd->om = VMXNET3_OM_NONE;
423 /* finally flip the GEN bit of the SOP desc */
424 txd->gen = txq->cmd_ring.gen;
425 txq->shared->ctrl.txNumDeferred++;
427 /* move to the next2fill descriptor */
428 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
432 PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
433 txq->stats.drop_total += (nb_pkts - nb_tx);
438 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
440 if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
442 txq->shared->ctrl.txNumDeferred = 0;
443 /* Notify vSwitch that packets are available. */
444 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
445 txq->cmd_ring.next2fill);
452 * Allocates mbufs and clusters. Post rx descriptors with buffer details
453 * so that device can receive packets in those buffers.
455 * Among the two rings, 1st ring contains buffers of type 0 and type1.
456 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
457 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
458 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
463 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
466 uint32_t i = 0, val = 0;
467 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
470 /* Usually: One HEAD type buf per packet
471 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
472 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
475 /* We use single packet buffer so all heads here */
476 val = VMXNET3_RXD_BTYPE_HEAD;
478 /* All BODY type buffers for 2nd ring */
479 val = VMXNET3_RXD_BTYPE_BODY;
482 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
483 struct Vmxnet3_RxDesc *rxd;
484 struct rte_mbuf *mbuf;
485 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
487 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
489 /* Allocate blank mbuf for the current Rx Descriptor */
490 mbuf = rte_rxmbuf_alloc(rxq->mp);
491 if (unlikely(mbuf == NULL)) {
492 PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
493 rxq->stats.rx_buf_alloc_failure++;
499 * Load mbuf pointer into buf_info[ring_size]
500 * buf_info structure is equivalent to cookie for virtio-virtqueue
503 buf_info->len = (uint16_t)(mbuf->buf_len -
504 RTE_PKTMBUF_HEADROOM);
505 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
507 /* Load Rx Descriptor with the buffer's GPA */
508 rxd->addr = buf_info->bufPA;
510 /* After this point rxd->addr MUST not be NULL */
512 rxd->len = buf_info->len;
513 /* Flip gen bit at the end to change ownership */
514 rxd->gen = ring->gen;
516 vmxnet3_cmd_ring_adv_next2fill(ring);
520 /* Return error only if no buffers are posted at present */
521 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
528 * Process the Rx Completion Ring of given vmxnet3_rx_queue
529 * for nb_pkts burst and return the number of packets received
532 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
535 uint32_t nb_rxd, idx;
537 vmxnet3_rx_queue_t *rxq;
538 Vmxnet3_RxCompDesc *rcd;
539 vmxnet3_buf_info_t *rbi;
541 struct rte_mbuf *rxm = NULL;
542 struct vmxnet3_hw *hw;
552 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
554 if (unlikely(rxq->stopped)) {
555 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
559 while (rcd->gen == rxq->comp_ring.gen) {
560 if (nb_rx >= nb_pkts)
564 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
565 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
566 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
568 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
569 rte_pktmbuf_free_seg(rbi->m);
570 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
574 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
576 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
577 VMXNET3_ASSERT(rcd->len <= rxd->len);
578 VMXNET3_ASSERT(rbi->m);
580 if (unlikely(rcd->len == 0)) {
581 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
583 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
584 VMXNET3_ASSERT(rcd->sop && rcd->eop);
586 rte_pktmbuf_free_seg(rbi->m);
590 /* Assuming a packet is coming in a single packet buffer */
591 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
593 "Alert : Misbehaving device, incorrect "
594 " buffer type used. iPacket dropped.");
595 rte_pktmbuf_free_seg(rbi->m);
598 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
599 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
601 /* Get the packet buffer pointer from buf_info */
604 /* Clear descriptor associated buf_info to be reused */
608 /* Update the index that we received a packet */
609 rxq->cmd_ring[ring_idx].next2comp = idx;
611 /* For RCD with EOP set, check if there is frame error */
612 if (unlikely(rcd->err)) {
613 rxq->stats.drop_total++;
614 rxq->stats.drop_err++;
617 rxq->stats.drop_fcs++;
618 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
620 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
621 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
622 rxq->comp_ring.base), rcd->rxdIdx);
623 rte_pktmbuf_free_seg(rxm);
627 /* Check for hardware stripped VLAN tag */
629 PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
631 rxm->ol_flags = PKT_RX_VLAN_PKT;
632 /* Copy vlan tag in packet buffer */
633 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
639 /* Initialize newly received packet buffer */
640 rxm->port = rxq->port_id;
643 rxm->pkt_len = (uint16_t)rcd->len;
644 rxm->data_len = (uint16_t)rcd->len;
645 rxm->data_off = RTE_PKTMBUF_HEADROOM;
647 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
649 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
650 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
652 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
653 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
655 rxm->ol_flags |= PKT_RX_IPV4_HDR;
659 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
661 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
662 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
666 rx_pkts[nb_rx++] = rxm;
668 rxq->cmd_ring[ring_idx].next2comp = idx;
669 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
671 /* It's time to allocate some new buf and renew descriptors */
672 vmxnet3_post_rx_bufs(rxq, ring_idx);
673 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
674 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
675 rxq->cmd_ring[ring_idx].next2fill);
678 /* Advance to the next descriptor in comp_ring */
679 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
681 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
683 if (nb_rxd > rxq->cmd_ring[0].size) {
685 "Used up quota of receiving packets,"
686 " relinquish control.");
695 * Create memzone for device rings. malloc can't be used as the physical address is
696 * needed. If the memzone is already created, then this function returns a ptr
699 static const struct rte_memzone *
700 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
701 uint16_t queue_id, uint32_t ring_size, int socket_id)
703 char z_name[RTE_MEMZONE_NAMESIZE];
704 const struct rte_memzone *mz;
706 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
707 dev->driver->pci_drv.name, ring_name,
708 dev->data->port_id, queue_id);
710 mz = rte_memzone_lookup(z_name);
714 return rte_memzone_reserve_aligned(z_name, ring_size,
715 socket_id, 0, VMXNET3_RING_BA_ALIGN);
719 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
722 unsigned int socket_id,
723 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
725 struct vmxnet3_hw *hw = dev->data->dev_private;
726 const struct rte_memzone *mz;
727 struct vmxnet3_tx_queue *txq;
728 struct vmxnet3_cmd_ring *ring;
729 struct vmxnet3_comp_ring *comp_ring;
730 struct vmxnet3_data_ring *data_ring;
733 PMD_INIT_FUNC_TRACE();
735 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
736 ETH_TXQ_FLAGS_NOMULTSEGS) {
737 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
741 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
742 ETH_TXQ_FLAGS_NOOFFLOADS) {
743 PMD_INIT_LOG(ERR, "TX not support offload function yet");
747 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
749 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
753 txq->queue_id = queue_idx;
754 txq->port_id = dev->data->port_id;
755 txq->shared = &hw->tqd_start[queue_idx];
757 txq->qid = queue_idx;
760 ring = &txq->cmd_ring;
761 comp_ring = &txq->comp_ring;
762 data_ring = &txq->data_ring;
764 /* Tx vmxnet ring length should be between 512-4096 */
765 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
766 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
767 VMXNET3_DEF_TX_RING_SIZE);
769 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
770 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
771 VMXNET3_TX_RING_MAX_SIZE);
774 ring->size = nb_desc;
775 ring->size &= ~VMXNET3_RING_SIZE_MASK;
777 comp_ring->size = data_ring->size = ring->size;
779 /* Tx vmxnet rings structure initialization*/
782 ring->gen = VMXNET3_INIT_GEN;
783 comp_ring->next2proc = 0;
784 comp_ring->gen = VMXNET3_INIT_GEN;
786 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
787 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
788 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
790 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
792 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
795 memset(mz->addr, 0, mz->len);
797 /* cmd_ring initialization */
798 ring->base = mz->addr;
799 ring->basePA = mz->phys_addr;
801 /* comp_ring initialization */
802 comp_ring->base = ring->base + ring->size;
803 comp_ring->basePA = ring->basePA +
804 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
806 /* data_ring initialization */
807 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
808 data_ring->basePA = comp_ring->basePA +
809 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
811 /* cmd_ring0 buf_info allocation */
812 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
813 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
814 if (ring->buf_info == NULL) {
815 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
819 /* Update the data portion with txq */
820 dev->data->tx_queues[queue_idx] = txq;
826 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
829 unsigned int socket_id,
830 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
831 struct rte_mempool *mp)
833 const struct rte_memzone *mz;
834 struct vmxnet3_rx_queue *rxq;
835 struct vmxnet3_hw *hw = dev->data->dev_private;
836 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
837 struct vmxnet3_comp_ring *comp_ring;
842 struct rte_pktmbuf_pool_private *mbp_priv;
844 PMD_INIT_FUNC_TRACE();
846 mbp_priv = (struct rte_pktmbuf_pool_private *)
847 rte_mempool_get_priv(mp);
848 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
849 RTE_PKTMBUF_HEADROOM);
851 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
852 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
853 "VMXNET3 don't support scatter packets yet",
854 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
858 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
860 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
865 rxq->queue_id = queue_idx;
866 rxq->port_id = dev->data->port_id;
867 rxq->shared = &hw->rqd_start[queue_idx];
869 rxq->qid1 = queue_idx;
870 rxq->qid2 = queue_idx + hw->num_rx_queues;
873 ring0 = &rxq->cmd_ring[0];
874 ring1 = &rxq->cmd_ring[1];
875 comp_ring = &rxq->comp_ring;
877 /* Rx vmxnet rings length should be between 256-4096 */
878 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
879 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
881 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
882 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
885 ring0->size = nb_desc;
886 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
887 ring1->size = ring0->size;
890 comp_ring->size = ring0->size + ring1->size;
892 /* Rx vmxnet rings structure initialization */
893 ring0->next2fill = 0;
894 ring1->next2fill = 0;
895 ring0->next2comp = 0;
896 ring1->next2comp = 0;
897 ring0->gen = VMXNET3_INIT_GEN;
898 ring1->gen = VMXNET3_INIT_GEN;
899 comp_ring->next2proc = 0;
900 comp_ring->gen = VMXNET3_INIT_GEN;
902 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
903 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
905 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
907 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
910 memset(mz->addr, 0, mz->len);
912 /* cmd_ring0 initialization */
913 ring0->base = mz->addr;
914 ring0->basePA = mz->phys_addr;
916 /* cmd_ring1 initialization */
917 ring1->base = ring0->base + ring0->size;
918 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
920 /* comp_ring initialization */
921 comp_ring->base = ring1->base + ring1->size;
922 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
925 /* cmd_ring0-cmd_ring1 buf_info allocation */
926 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
928 ring = &rxq->cmd_ring[i];
930 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
932 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
933 if (ring->buf_info == NULL) {
934 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
939 /* Update the data portion with rxq */
940 dev->data->rx_queues[queue_idx] = rxq;
946 * Initializes Receive Unit
947 * Load mbufs in rx queue in advance
950 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
952 struct vmxnet3_hw *hw = dev->data->dev_private;
957 PMD_INIT_FUNC_TRACE();
959 for (i = 0; i < hw->num_rx_queues; i++) {
960 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
962 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
963 /* Passing 0 as alloc_num will allocate full ring */
964 ret = vmxnet3_post_rx_bufs(rxq, j);
966 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
969 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
970 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
971 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
972 rxq->cmd_ring[j].next2fill);
975 rxq->stopped = FALSE;
978 for (i = 0; i < dev->data->nb_tx_queues; i++) {
979 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
981 txq->stopped = FALSE;
987 static uint8_t rss_intel_key[40] = {
988 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
989 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
990 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
991 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
992 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
996 * Configure RSS feature
999 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1001 struct vmxnet3_hw *hw = dev->data->dev_private;
1002 struct VMXNET3_RSSConf *dev_rss_conf;
1003 struct rte_eth_rss_conf *port_rss_conf;
1007 PMD_INIT_FUNC_TRACE();
1009 dev_rss_conf = hw->rss_conf;
1010 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1012 /* loading hashFunc */
1013 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1014 /* loading hashKeySize */
1015 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1016 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1017 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1019 if (port_rss_conf->rss_key == NULL) {
1020 /* Default hash key */
1021 port_rss_conf->rss_key = rss_intel_key;
1024 /* loading hashKey */
1025 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1027 /* loading indTable */
1028 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1029 if (j == dev->data->nb_rx_queues)
1031 dev_rss_conf->indTable[i] = j;
1034 /* loading hashType */
1035 dev_rss_conf->hashType = 0;
1036 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1037 if (rss_hf & ETH_RSS_IPV4)
1038 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1039 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1040 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1041 if (rss_hf & ETH_RSS_IPV6)
1042 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1043 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1044 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1046 return VMXNET3_SUCCESS;
1050 * Configure VLAN Filter feature
1053 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1056 struct vmxnet3_hw *hw = dev->data->dev_private;
1057 uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1059 PMD_INIT_FUNC_TRACE();
1061 /* Verify if this tag is already set */
1062 for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1063 /* Filter all vlan tags out by default */
1065 /* To-Do: Provide another routine in dev_ops for user config */
1067 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1068 dev->data->port_id, vf_table[i]);
1071 return VMXNET3_SUCCESS;