4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, 0);
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
112 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113 (unsigned long)rxq->cmd_ring[0].basePA,
114 (unsigned long)rxq->cmd_ring[1].basePA,
115 (unsigned long)rxq->comp_ring.basePA);
117 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
119 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120 (uint32_t)rxq->cmd_ring[0].size, avail,
121 rxq->comp_ring.next2proc,
122 rxq->cmd_ring[0].size - avail);
124 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127 rxq->cmd_ring[1].size - avail);
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142 (unsigned long)txq->cmd_ring.basePA,
143 (unsigned long)txq->comp_ring.basePA,
144 (unsigned long)txq->data_ring.basePA);
146 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148 (uint32_t)txq->cmd_ring.size, avail,
149 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
156 while (ring->next2comp != ring->next2fill) {
157 /* No need to worry about tx desc ownership, device is quiesced by now. */
158 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
161 rte_pktmbuf_free(buf_info->m);
166 vmxnet3_cmd_ring_adv_next2comp(ring);
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
173 vmxnet3_cmd_ring_release_mbufs(ring);
174 rte_free(ring->buf_info);
175 ring->buf_info = NULL;
180 vmxnet3_dev_tx_queue_release(void *txq)
182 vmxnet3_tx_queue_t *tq = txq;
185 /* Release the cmd_ring */
186 vmxnet3_cmd_ring_release(&tq->cmd_ring);
191 vmxnet3_dev_rx_queue_release(void *rxq)
194 vmxnet3_rx_queue_t *rq = rxq;
197 /* Release both the cmd_rings */
198 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
204 vmxnet3_dev_tx_queue_reset(void *txq)
206 vmxnet3_tx_queue_t *tq = txq;
207 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 /* Release the cmd_ring mbufs */
214 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
217 /* Tx vmxnet rings structure initialization*/
220 ring->gen = VMXNET3_INIT_GEN;
221 comp_ring->next2proc = 0;
222 comp_ring->gen = VMXNET3_INIT_GEN;
224 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
228 memset(ring->base, 0, size);
232 vmxnet3_dev_rx_queue_reset(void *rxq)
235 vmxnet3_rx_queue_t *rq = rxq;
236 struct vmxnet3_cmd_ring *ring0, *ring1;
237 struct vmxnet3_comp_ring *comp_ring;
241 /* Release both the cmd_rings mbufs */
242 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
246 ring0 = &rq->cmd_ring[0];
247 ring1 = &rq->cmd_ring[1];
248 comp_ring = &rq->comp_ring;
250 /* Rx vmxnet rings structure initialization */
251 ring0->next2fill = 0;
252 ring1->next2fill = 0;
253 ring0->next2comp = 0;
254 ring1->next2comp = 0;
255 ring0->gen = VMXNET3_INIT_GEN;
256 ring1->gen = VMXNET3_INIT_GEN;
257 comp_ring->next2proc = 0;
258 comp_ring->gen = VMXNET3_INIT_GEN;
260 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
263 memset(ring0->base, 0, size);
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
271 PMD_INIT_FUNC_TRACE();
273 for (i = 0; i < dev->data->nb_tx_queues; i++) {
274 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
278 vmxnet3_dev_tx_queue_reset(txq);
282 for (i = 0; i < dev->data->nb_rx_queues; i++) {
283 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
287 vmxnet3_dev_rx_queue_reset(rxq);
293 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
296 struct rte_mbuf *mbuf;
297 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
298 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
299 (comp_ring->base + comp_ring->next2proc);
301 while (tcd->gen == comp_ring->gen) {
302 /* Release cmd_ring descriptor and free mbuf */
303 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
304 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
305 mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
306 txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
307 rte_pktmbuf_free_seg(mbuf);
309 /* Mark the txd for which tcd was generated as completed */
310 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
314 vmxnet3_comp_ring_adv_next2proc(comp_ring);
315 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
316 comp_ring->next2proc);
319 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
323 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
327 vmxnet3_tx_queue_t *txq = tx_queue;
328 struct vmxnet3_hw *hw = txq->hw;
329 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
330 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
332 if (unlikely(txq->stopped)) {
333 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
337 /* Free up the comp_descriptors aggressively */
338 vmxnet3_tq_tx_complete(txq);
341 while (nb_tx < nb_pkts) {
342 Vmxnet3_GenericDesc *gdesc;
343 vmxnet3_buf_info_t *tbi;
344 uint32_t first2fill, avail, dw2;
345 struct rte_mbuf *txm = tx_pkts[nb_tx];
346 struct rte_mbuf *m_seg = txm;
349 /* Is this packet execessively fragmented, then drop */
350 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
351 ++txq->stats.drop_too_many_segs;
352 ++txq->stats.drop_total;
353 rte_pktmbuf_free(txm);
358 /* Is command ring full? */
359 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
360 if (txm->nb_segs > avail) {
361 ++txq->stats.tx_ring_full;
365 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
366 struct Vmxnet3_TxDataDesc *tdd;
368 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
369 copy_size = rte_pktmbuf_pkt_len(txm);
370 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
373 /* use the previous gen bit for the SOP desc */
374 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
375 first2fill = txq->cmd_ring.next2fill;
377 /* Remember the transmit buffer for cleanup */
378 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
381 /* NB: the following assumes that VMXNET3 maximum
382 transmit buffer size (16K) is greater than
383 maximum sizeof mbuf segment size. */
384 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
386 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
387 txq->cmd_ring.next2fill *
388 sizeof(struct Vmxnet3_TxDataDesc));
390 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
392 gdesc->dword[2] = dw2 | m_seg->data_len;
395 /* move to the next2fill descriptor */
396 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
398 /* use the right gen for non-SOP desc */
399 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
400 } while ((m_seg = m_seg->next) != NULL);
402 /* Update the EOP descriptor */
403 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
405 /* Add VLAN tag if present */
406 gdesc = txq->cmd_ring.base + first2fill;
407 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
409 gdesc->txd.tci = txm->vlan_tci;
412 if (txm->ol_flags & PKT_TX_L4_MASK) {
413 gdesc->txd.om = VMXNET3_OM_CSUM;
414 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
416 switch (txm->ol_flags & PKT_TX_L4_MASK) {
417 case PKT_TX_TCP_CKSUM:
418 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
420 case PKT_TX_UDP_CKSUM:
421 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
424 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
425 txm->ol_flags & PKT_TX_L4_MASK);
430 gdesc->txd.om = VMXNET3_OM_NONE;
431 gdesc->txd.msscof = 0;
434 /* flip the GEN bit on the SOP */
435 rte_compiler_barrier();
436 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
438 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(++deferred);
442 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
444 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
445 txq_ctrl->txNumDeferred = 0;
446 /* Notify vSwitch that packets are available. */
447 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
448 txq->cmd_ring.next2fill);
455 * Allocates mbufs and clusters. Post rx descriptors with buffer details
456 * so that device can receive packets in those buffers.
458 * Among the two rings, 1st ring contains buffers of type 0 and type1.
459 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
460 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
461 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
466 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
469 uint32_t i = 0, val = 0;
470 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
473 /* Usually: One HEAD type buf per packet
474 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
475 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
478 /* We use single packet buffer so all heads here */
479 val = VMXNET3_RXD_BTYPE_HEAD;
481 /* All BODY type buffers for 2nd ring */
482 val = VMXNET3_RXD_BTYPE_BODY;
485 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
486 struct Vmxnet3_RxDesc *rxd;
487 struct rte_mbuf *mbuf;
488 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
490 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
492 /* Allocate blank mbuf for the current Rx Descriptor */
493 mbuf = rte_rxmbuf_alloc(rxq->mp);
494 if (unlikely(mbuf == NULL)) {
495 PMD_RX_LOG(ERR, "Error allocating mbuf");
496 rxq->stats.rx_buf_alloc_failure++;
502 * Load mbuf pointer into buf_info[ring_size]
503 * buf_info structure is equivalent to cookie for virtio-virtqueue
506 buf_info->len = (uint16_t)(mbuf->buf_len -
507 RTE_PKTMBUF_HEADROOM);
509 rte_mbuf_data_dma_addr_default(mbuf);
511 /* Load Rx Descriptor with the buffer's GPA */
512 rxd->addr = buf_info->bufPA;
514 /* After this point rxd->addr MUST not be NULL */
516 rxd->len = buf_info->len;
517 /* Flip gen bit at the end to change ownership */
518 rxd->gen = ring->gen;
520 vmxnet3_cmd_ring_adv_next2fill(ring);
524 /* Return error only if no buffers are posted at present */
525 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
532 /* Receive side checksum and other offloads */
534 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
536 /* Check for hardware stripped VLAN tag */
538 rxm->ol_flags |= PKT_RX_VLAN_PKT;
539 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
543 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
544 rxm->ol_flags |= PKT_RX_RSS_HASH;
545 rxm->hash.rss = rcd->rssHash;
548 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
550 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
551 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
553 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
554 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
556 rxm->packet_type = RTE_PTYPE_L3_IPV4;
560 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
562 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
563 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
569 * Process the Rx Completion Ring of given vmxnet3_rx_queue
570 * for nb_pkts burst and return the number of packets received
573 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
576 uint32_t nb_rxd, idx;
578 vmxnet3_rx_queue_t *rxq;
579 Vmxnet3_RxCompDesc *rcd;
580 vmxnet3_buf_info_t *rbi;
582 struct rte_mbuf *rxm = NULL;
583 struct vmxnet3_hw *hw;
593 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
595 if (unlikely(rxq->stopped)) {
596 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
600 while (rcd->gen == rxq->comp_ring.gen) {
601 if (nb_rx >= nb_pkts)
605 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
606 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
607 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
609 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
610 rte_pktmbuf_free_seg(rbi->m);
611 PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
615 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
617 VMXNET3_ASSERT(rcd->len <= rxd->len);
618 VMXNET3_ASSERT(rbi->m);
620 if (unlikely(rcd->len == 0)) {
621 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
623 VMXNET3_ASSERT(rcd->sop && rcd->eop);
624 rte_pktmbuf_free_seg(rbi->m);
628 /* Assuming a packet is coming in a single packet buffer */
629 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
631 "Alert : Misbehaving device, incorrect "
632 " buffer type used. Packet dropped.");
633 rte_pktmbuf_free_seg(rbi->m);
636 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
638 /* Get the packet buffer pointer from buf_info */
641 /* Clear descriptor associated buf_info to be reused */
645 /* Update the index that we received a packet */
646 rxq->cmd_ring[ring_idx].next2comp = idx;
648 /* For RCD with EOP set, check if there is frame error */
649 if (unlikely(rcd->err)) {
650 rxq->stats.drop_total++;
651 rxq->stats.drop_err++;
654 rxq->stats.drop_fcs++;
655 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
657 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
658 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
659 rxq->comp_ring.base), rcd->rxdIdx);
660 rte_pktmbuf_free_seg(rxm);
665 /* Initialize newly received packet buffer */
666 rxm->port = rxq->port_id;
669 rxm->pkt_len = (uint16_t)rcd->len;
670 rxm->data_len = (uint16_t)rcd->len;
671 rxm->data_off = RTE_PKTMBUF_HEADROOM;
675 vmxnet3_rx_offload(rcd, rxm);
677 rx_pkts[nb_rx++] = rxm;
679 rxq->cmd_ring[ring_idx].next2comp = idx;
680 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
682 /* It's time to allocate some new buf and renew descriptors */
683 vmxnet3_post_rx_bufs(rxq, ring_idx);
684 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
685 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
686 rxq->cmd_ring[ring_idx].next2fill);
689 /* Advance to the next descriptor in comp_ring */
690 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
692 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
694 if (nb_rxd > rxq->cmd_ring[0].size) {
696 "Used up quota of receiving packets,"
697 " relinquish control.");
706 * Create memzone for device rings. malloc can't be used as the physical address is
707 * needed. If the memzone is already created, then this function returns a ptr
710 static const struct rte_memzone *
711 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
712 uint16_t queue_id, uint32_t ring_size, int socket_id)
714 char z_name[RTE_MEMZONE_NAMESIZE];
715 const struct rte_memzone *mz;
717 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
718 dev->driver->pci_drv.name, ring_name,
719 dev->data->port_id, queue_id);
721 mz = rte_memzone_lookup(z_name);
725 return rte_memzone_reserve_aligned(z_name, ring_size,
726 socket_id, 0, VMXNET3_RING_BA_ALIGN);
730 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
733 unsigned int socket_id,
734 __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
736 struct vmxnet3_hw *hw = dev->data->dev_private;
737 const struct rte_memzone *mz;
738 struct vmxnet3_tx_queue *txq;
739 struct vmxnet3_cmd_ring *ring;
740 struct vmxnet3_comp_ring *comp_ring;
741 struct vmxnet3_data_ring *data_ring;
744 PMD_INIT_FUNC_TRACE();
746 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
747 ETH_TXQ_FLAGS_NOXSUMSCTP) {
748 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
752 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
754 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
758 txq->queue_id = queue_idx;
759 txq->port_id = dev->data->port_id;
760 txq->shared = &hw->tqd_start[queue_idx];
762 txq->qid = queue_idx;
765 ring = &txq->cmd_ring;
766 comp_ring = &txq->comp_ring;
767 data_ring = &txq->data_ring;
769 /* Tx vmxnet ring length should be between 512-4096 */
770 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
771 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
772 VMXNET3_DEF_TX_RING_SIZE);
774 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
775 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
776 VMXNET3_TX_RING_MAX_SIZE);
779 ring->size = nb_desc;
780 ring->size &= ~VMXNET3_RING_SIZE_MASK;
782 comp_ring->size = data_ring->size = ring->size;
784 /* Tx vmxnet rings structure initialization*/
787 ring->gen = VMXNET3_INIT_GEN;
788 comp_ring->next2proc = 0;
789 comp_ring->gen = VMXNET3_INIT_GEN;
791 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
792 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
793 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
795 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
797 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
800 memset(mz->addr, 0, mz->len);
802 /* cmd_ring initialization */
803 ring->base = mz->addr;
804 ring->basePA = mz->phys_addr;
806 /* comp_ring initialization */
807 comp_ring->base = ring->base + ring->size;
808 comp_ring->basePA = ring->basePA +
809 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
811 /* data_ring initialization */
812 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
813 data_ring->basePA = comp_ring->basePA +
814 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
816 /* cmd_ring0 buf_info allocation */
817 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
818 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
819 if (ring->buf_info == NULL) {
820 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
824 /* Update the data portion with txq */
825 dev->data->tx_queues[queue_idx] = txq;
831 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
834 unsigned int socket_id,
835 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
836 struct rte_mempool *mp)
838 const struct rte_memzone *mz;
839 struct vmxnet3_rx_queue *rxq;
840 struct vmxnet3_hw *hw = dev->data->dev_private;
841 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
842 struct vmxnet3_comp_ring *comp_ring;
848 PMD_INIT_FUNC_TRACE();
850 buf_size = rte_pktmbuf_data_room_size(mp) -
851 RTE_PKTMBUF_HEADROOM;
853 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
854 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
855 "VMXNET3 don't support scatter packets yet",
856 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
860 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
862 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
867 rxq->queue_id = queue_idx;
868 rxq->port_id = dev->data->port_id;
869 rxq->shared = &hw->rqd_start[queue_idx];
871 rxq->qid1 = queue_idx;
872 rxq->qid2 = queue_idx + hw->num_rx_queues;
875 ring0 = &rxq->cmd_ring[0];
876 ring1 = &rxq->cmd_ring[1];
877 comp_ring = &rxq->comp_ring;
879 /* Rx vmxnet rings length should be between 256-4096 */
880 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
881 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
883 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
884 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
887 ring0->size = nb_desc;
888 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
889 ring1->size = ring0->size;
892 comp_ring->size = ring0->size + ring1->size;
894 /* Rx vmxnet rings structure initialization */
895 ring0->next2fill = 0;
896 ring1->next2fill = 0;
897 ring0->next2comp = 0;
898 ring1->next2comp = 0;
899 ring0->gen = VMXNET3_INIT_GEN;
900 ring1->gen = VMXNET3_INIT_GEN;
901 comp_ring->next2proc = 0;
902 comp_ring->gen = VMXNET3_INIT_GEN;
904 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
905 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
907 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
909 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
912 memset(mz->addr, 0, mz->len);
914 /* cmd_ring0 initialization */
915 ring0->base = mz->addr;
916 ring0->basePA = mz->phys_addr;
918 /* cmd_ring1 initialization */
919 ring1->base = ring0->base + ring0->size;
920 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
922 /* comp_ring initialization */
923 comp_ring->base = ring1->base + ring1->size;
924 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
927 /* cmd_ring0-cmd_ring1 buf_info allocation */
928 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
930 ring = &rxq->cmd_ring[i];
932 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
934 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
935 if (ring->buf_info == NULL) {
936 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
941 /* Update the data portion with rxq */
942 dev->data->rx_queues[queue_idx] = rxq;
948 * Initializes Receive Unit
949 * Load mbufs in rx queue in advance
952 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
954 struct vmxnet3_hw *hw = dev->data->dev_private;
959 PMD_INIT_FUNC_TRACE();
961 for (i = 0; i < hw->num_rx_queues; i++) {
962 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
964 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
965 /* Passing 0 as alloc_num will allocate full ring */
966 ret = vmxnet3_post_rx_bufs(rxq, j);
968 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
971 /* Updating device with the index:next2fill to fill the mbufs for coming packets */
972 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
973 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
974 rxq->cmd_ring[j].next2fill);
977 rxq->stopped = FALSE;
980 for (i = 0; i < dev->data->nb_tx_queues; i++) {
981 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
983 txq->stopped = FALSE;
989 static uint8_t rss_intel_key[40] = {
990 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
991 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
992 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
993 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
994 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
998 * Configure RSS feature
1001 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1003 struct vmxnet3_hw *hw = dev->data->dev_private;
1004 struct VMXNET3_RSSConf *dev_rss_conf;
1005 struct rte_eth_rss_conf *port_rss_conf;
1009 PMD_INIT_FUNC_TRACE();
1011 dev_rss_conf = hw->rss_conf;
1012 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1014 /* loading hashFunc */
1015 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1016 /* loading hashKeySize */
1017 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1018 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1019 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1021 if (port_rss_conf->rss_key == NULL) {
1022 /* Default hash key */
1023 port_rss_conf->rss_key = rss_intel_key;
1026 /* loading hashKey */
1027 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1029 /* loading indTable */
1030 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1031 if (j == dev->data->nb_rx_queues)
1033 dev_rss_conf->indTable[i] = j;
1036 /* loading hashType */
1037 dev_rss_conf->hashType = 0;
1038 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1039 if (rss_hf & ETH_RSS_IPV4)
1040 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1041 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1042 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1043 if (rss_hf & ETH_RSS_IPV6)
1044 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1045 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1046 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1048 return VMXNET3_SUCCESS;