23c0e955352f08e7300e64ae47b49ee1ecbea341
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_IPV6 |     \
54                 PKT_TX_IPV4 |     \
55                 PKT_TX_L4_MASK |  \
56                 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
59         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74         uint32_t avail = 0;
75
76         if (rxq == NULL)
77                 return;
78
79         PMD_RX_LOG(DEBUG,
80                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82         PMD_RX_LOG(DEBUG,
83                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84                    (unsigned long)rxq->cmd_ring[0].basePA,
85                    (unsigned long)rxq->cmd_ring[1].basePA,
86                    (unsigned long)rxq->comp_ring.basePA);
87
88         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89         PMD_RX_LOG(DEBUG,
90                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91                    (uint32_t)rxq->cmd_ring[0].size, avail,
92                    rxq->comp_ring.next2proc,
93                    rxq->cmd_ring[0].size - avail);
94
95         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98                    rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105         uint32_t avail = 0;
106
107         if (txq == NULL)
108                 return;
109
110         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113                    (unsigned long)txq->cmd_ring.basePA,
114                    (unsigned long)txq->comp_ring.basePA,
115                    (unsigned long)txq->data_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119                    (uint32_t)txq->cmd_ring.size, avail,
120                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127         while (ring->next2comp != ring->next2fill) {
128                 /* No need to worry about desc ownership, device is quiesced by now. */
129                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131                 if (buf_info->m) {
132                         rte_pktmbuf_free(buf_info->m);
133                         buf_info->m = NULL;
134                         buf_info->bufPA = 0;
135                         buf_info->len = 0;
136                 }
137                 vmxnet3_cmd_ring_adv_next2comp(ring);
138         }
139 }
140
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144         uint32_t i;
145
146         for (i = 0; i < ring->size; i++) {
147                 /* No need to worry about desc ownership, device is quiesced by now. */
148                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150                 if (buf_info->m) {
151                         rte_pktmbuf_free_seg(buf_info->m);
152                         buf_info->m = NULL;
153                         buf_info->bufPA = 0;
154                         buf_info->len = 0;
155                 }
156                 vmxnet3_cmd_ring_adv_next2comp(ring);
157         }
158 }
159
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170         vmxnet3_tx_queue_t *tq = txq;
171
172         if (tq != NULL) {
173                 /* Release mbufs */
174                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177                 /* Release the memzone */
178                 rte_memzone_free(tq->mz);
179                 /* Release the queue */
180                 rte_free(tq);
181         }
182 }
183
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187         int i;
188         vmxnet3_rx_queue_t *rq = rxq;
189
190         if (rq != NULL) {
191                 /* Release mbufs */
192                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195                 /* Release both the cmd_rings */
196                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199                 /* Release the memzone */
200                 rte_memzone_free(rq->mz);
201
202                 /* Release the queue */
203                 rte_free(rq);
204         }
205 }
206
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210         vmxnet3_tx_queue_t *tq = txq;
211         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214         int size;
215
216         if (tq != NULL) {
217                 /* Release the cmd_ring mbufs */
218                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219         }
220
221         /* Tx vmxnet rings structure initialization*/
222         ring->next2fill = 0;
223         ring->next2comp = 0;
224         ring->gen = VMXNET3_INIT_GEN;
225         comp_ring->next2proc = 0;
226         comp_ring->gen = VMXNET3_INIT_GEN;
227
228         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230         size += tq->txdata_desc_size * data_ring->size;
231
232         memset(ring->base, 0, size);
233 }
234
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238         int i;
239         vmxnet3_rx_queue_t *rq = rxq;
240         struct vmxnet3_hw *hw = rq->hw;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244         int size;
245
246         /* Release both the cmd_rings mbufs */
247         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250         ring0 = &rq->cmd_ring[0];
251         ring1 = &rq->cmd_ring[1];
252         comp_ring = &rq->comp_ring;
253
254         /* Rx vmxnet rings structure initialization */
255         ring0->next2fill = 0;
256         ring1->next2fill = 0;
257         ring0->next2comp = 0;
258         ring1->next2comp = 0;
259         ring0->gen = VMXNET3_INIT_GEN;
260         ring1->gen = VMXNET3_INIT_GEN;
261         comp_ring->next2proc = 0;
262         comp_ring->gen = VMXNET3_INIT_GEN;
263
264         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267                 size += rq->data_desc_size * data_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303
304         /* Release cmd_ring descriptor and free mbuf */
305         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308         if (mbuf == NULL)
309                 rte_panic("EOP desc does not point to a valid mbuf");
310         rte_pktmbuf_free(mbuf);
311
312         txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314         while (txq->cmd_ring.next2comp != eop_idx) {
315                 /* no out-of-order completion */
316                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318                 completed++;
319         }
320
321         /* Mark the txd for which tcd was generated as completed */
322         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324         return completed + 1;
325 }
326
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330         int completed = 0;
331         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333                 (comp_ring->base + comp_ring->next2proc);
334
335         while (tcd->gen == comp_ring->gen) {
336                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340                                                     comp_ring->next2proc);
341         }
342
343         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348         uint16_t nb_pkts)
349 {
350         int32_t ret;
351         uint32_t i;
352         uint64_t ol_flags;
353         struct rte_mbuf *m;
354
355         for (i = 0; i != nb_pkts; i++) {
356                 m = tx_pkts[i];
357                 ol_flags = m->ol_flags;
358
359                 /* Non-TSO packet cannot occupy more than
360                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361                  */
362                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364                         rte_errno = -EINVAL;
365                         return i;
366                 }
367
368                 /* check that only supported TX offloads are requested. */
369                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370                                 (ol_flags & PKT_TX_L4_MASK) ==
371                                 PKT_TX_SCTP_CKSUM) {
372                         rte_errno = -ENOTSUP;
373                         return i;
374                 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377                 ret = rte_validate_tx_offload(m);
378                 if (ret != 0) {
379                         rte_errno = ret;
380                         return i;
381                 }
382 #endif
383                 ret = rte_net_intel_cksum_prepare(m);
384                 if (ret != 0) {
385                         rte_errno = ret;
386                         return i;
387                 }
388         }
389
390         return i;
391 }
392
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395                   uint16_t nb_pkts)
396 {
397         uint16_t nb_tx;
398         vmxnet3_tx_queue_t *txq = tx_queue;
399         struct vmxnet3_hw *hw = txq->hw;
400         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403         if (unlikely(txq->stopped)) {
404                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405                 return 0;
406         }
407
408         /* Free up the comp_descriptors aggressively */
409         vmxnet3_tq_tx_complete(txq);
410
411         nb_tx = 0;
412         while (nb_tx < nb_pkts) {
413                 Vmxnet3_GenericDesc *gdesc;
414                 vmxnet3_buf_info_t *tbi;
415                 uint32_t first2fill, avail, dw2;
416                 struct rte_mbuf *txm = tx_pkts[nb_tx];
417                 struct rte_mbuf *m_seg = txm;
418                 int copy_size = 0;
419                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420                 /* # of descriptors needed for a packet. */
421                 unsigned count = txm->nb_segs;
422
423                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424                 if (count > avail) {
425                         /* Is command ring full? */
426                         if (unlikely(avail == 0)) {
427                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428                                 txq->stats.tx_ring_full++;
429                                 txq->stats.drop_total += (nb_pkts - nb_tx);
430                                 break;
431                         }
432
433                         /* Command ring is not full but cannot handle the
434                          * multi-segmented packet. Let's try the next packet
435                          * in this case.
436                          */
437                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438                                    "(avail %d needed %d)", avail, count);
439                         txq->stats.drop_total++;
440                         if (tso)
441                                 txq->stats.drop_tso++;
442                         rte_pktmbuf_free(txm);
443                         nb_tx++;
444                         continue;
445                 }
446
447                 /* Drop non-TSO packet that is excessively fragmented */
448                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451                         txq->stats.drop_too_many_segs++;
452                         txq->stats.drop_total++;
453                         rte_pktmbuf_free(txm);
454                         nb_tx++;
455                         continue;
456                 }
457
458                 if (txm->nb_segs == 1 &&
459                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460                         struct Vmxnet3_TxDataDesc *tdd;
461
462                         /* Skip empty packets */
463                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464                                 txq->stats.drop_total++;
465                                 rte_pktmbuf_free(txm);
466                                 nb_tx++;
467                                 continue;
468                         }
469
470                         tdd = (struct Vmxnet3_TxDataDesc *)
471                                 ((uint8 *)txq->data_ring.base +
472                                  txq->cmd_ring.next2fill *
473                                  txq->txdata_desc_size);
474                         copy_size = rte_pktmbuf_pkt_len(txm);
475                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476                 }
477
478                 /* use the previous gen bit for the SOP desc */
479                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480                 first2fill = txq->cmd_ring.next2fill;
481                 do {
482                         /* Remember the transmit buffer for cleanup */
483                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485                         /* NB: the following assumes that VMXNET3 maximum
486                          * transmit buffer size (16K) is greater than
487                          * maximum size of mbuf segment size.
488                          */
489                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491                         /* Skip empty segments */
492                         if (unlikely(m_seg->data_len == 0))
493                                 continue;
494
495                         if (copy_size) {
496                                 uint64 offset =
497                                         (uint64)txq->cmd_ring.next2fill *
498                                                         txq->txdata_desc_size;
499                                 gdesc->txd.addr =
500                                         rte_cpu_to_le_64(txq->data_ring.basePA +
501                                                          offset);
502                         } else {
503                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504                         }
505
506                         gdesc->dword[2] = dw2 | m_seg->data_len;
507                         gdesc->dword[3] = 0;
508
509                         /* move to the next2fill descriptor */
510                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512                         /* use the right gen for non-SOP desc */
513                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514                 } while ((m_seg = m_seg->next) != NULL);
515
516                 /* set the last buf_info for the pkt */
517                 tbi->m = txm;
518                 /* Update the EOP descriptor */
519                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521                 /* Add VLAN tag if present */
522                 gdesc = txq->cmd_ring.base + first2fill;
523                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524                         gdesc->txd.ti = 1;
525                         gdesc->txd.tci = txm->vlan_tci;
526                 }
527
528                 if (tso) {
529                         uint16_t mss = txm->tso_segsz;
530
531                         RTE_ASSERT(mss > 0);
532
533                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534                         gdesc->txd.om = VMXNET3_OM_TSO;
535                         gdesc->txd.msscof = mss;
536
537                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539                         gdesc->txd.om = VMXNET3_OM_CSUM;
540                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
543                         case PKT_TX_TCP_CKSUM:
544                                 gdesc->txd.msscof = gdesc->txd.hlen +
545                                         offsetof(struct rte_tcp_hdr, cksum);
546                                 break;
547                         case PKT_TX_UDP_CKSUM:
548                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
549                                 break;
550                         default:
551                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
552                                            txm->ol_flags & PKT_TX_L4_MASK);
553                                 abort();
554                         }
555                         deferred++;
556                 } else {
557                         gdesc->txd.hlen = 0;
558                         gdesc->txd.om = VMXNET3_OM_NONE;
559                         gdesc->txd.msscof = 0;
560                         deferred++;
561                 }
562
563                 /* flip the GEN bit on the SOP */
564                 rte_compiler_barrier();
565                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
566
567                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
568                 nb_tx++;
569         }
570
571         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
572
573         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
574                 txq_ctrl->txNumDeferred = 0;
575                 /* Notify vSwitch that packets are available. */
576                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
577                                        txq->cmd_ring.next2fill);
578         }
579
580         return nb_tx;
581 }
582
583 static inline void
584 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
585                    struct rte_mbuf *mbuf)
586 {
587         uint32_t val;
588         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
589         struct Vmxnet3_RxDesc *rxd =
590                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
591         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
592
593         if (ring_id == 0) {
594                 /* Usually: One HEAD type buf per packet
595                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
596                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
597                  */
598
599                 /* We use single packet buffer so all heads here */
600                 val = VMXNET3_RXD_BTYPE_HEAD;
601         } else {
602                 /* All BODY type buffers for 2nd ring */
603                 val = VMXNET3_RXD_BTYPE_BODY;
604         }
605
606         /*
607          * Load mbuf pointer into buf_info[ring_size]
608          * buf_info structure is equivalent to cookie for virtio-virtqueue
609          */
610         buf_info->m = mbuf;
611         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
612         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
613
614         /* Load Rx Descriptor with the buffer's GPA */
615         rxd->addr = buf_info->bufPA;
616
617         /* After this point rxd->addr MUST not be NULL */
618         rxd->btype = val;
619         rxd->len = buf_info->len;
620         /* Flip gen bit at the end to change ownership */
621         rxd->gen = ring->gen;
622
623         vmxnet3_cmd_ring_adv_next2fill(ring);
624 }
625 /*
626  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
627  *  so that device can receive packets in those buffers.
628  *  Ring layout:
629  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
630  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
631  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
632  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
633  *      only for LRO.
634  */
635 static int
636 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
637 {
638         int err = 0;
639         uint32_t i = 0;
640         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
641
642         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
643                 struct rte_mbuf *mbuf;
644
645                 /* Allocate blank mbuf for the current Rx Descriptor */
646                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
647                 if (unlikely(mbuf == NULL)) {
648                         PMD_RX_LOG(ERR, "Error allocating mbuf");
649                         rxq->stats.rx_buf_alloc_failure++;
650                         err = ENOMEM;
651                         break;
652                 }
653
654                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
655                 i++;
656         }
657
658         /* Return error only if no buffers are posted at present */
659         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
660                 return -err;
661         else
662                 return i;
663 }
664
665 /* MSS not provided by vmxnet3, guess one with available information */
666 static uint16_t
667 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
668                 struct rte_mbuf *rxm)
669 {
670         uint32_t hlen, slen;
671         struct rte_ipv4_hdr *ipv4_hdr;
672         struct rte_ipv6_hdr *ipv6_hdr;
673         struct rte_tcp_hdr *tcp_hdr;
674         char *ptr;
675
676         RTE_ASSERT(rcd->tcp);
677
678         ptr = rte_pktmbuf_mtod(rxm, char *);
679         slen = rte_pktmbuf_data_len(rxm);
680         hlen = sizeof(struct rte_ether_hdr);
681
682         if (rcd->v4) {
683                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
684                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
685                                         - sizeof(struct rte_tcp_hdr);
686
687                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
688                 hlen += (ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
689                                 RTE_IPV4_IHL_MULTIPLIER;
690         } else if (rcd->v6) {
691                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
692                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
693                                         sizeof(struct rte_tcp_hdr);
694
695                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
696                 hlen += sizeof(struct rte_ipv6_hdr);
697                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
698                         int frag;
699
700                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
701                                         &hlen, &frag);
702                 }
703         }
704
705         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
706                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
707                                 sizeof(struct rte_ether_hdr);
708
709         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
710         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
711
712         if (rxm->udata64 > 1)
713                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
714                                 rxm->udata64 - 1) / rxm->udata64;
715         else
716                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
717 }
718
719 /* Receive side checksum and other offloads */
720 static inline void
721 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
722                 struct rte_mbuf *rxm, const uint8_t sop)
723 {
724         uint64_t ol_flags = rxm->ol_flags;
725         uint32_t packet_type = rxm->packet_type;
726
727         /* Offloads set in sop */
728         if (sop) {
729                 /* Set packet type */
730                 packet_type |= RTE_PTYPE_L2_ETHER;
731
732                 /* Check large packet receive */
733                 if (VMXNET3_VERSION_GE_2(hw) &&
734                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
735                         const Vmxnet3_RxCompDescExt *rcde =
736                                         (const Vmxnet3_RxCompDescExt *)rcd;
737
738                         rxm->tso_segsz = rcde->mss;
739                         rxm->udata64 = rcde->segCnt;
740                         ol_flags |= PKT_RX_LRO;
741                 }
742         } else { /* Offloads set in eop */
743                 /* Check for RSS */
744                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
745                         ol_flags |= PKT_RX_RSS_HASH;
746                         rxm->hash.rss = rcd->rssHash;
747                 }
748
749                 /* Check for hardware stripped VLAN tag */
750                 if (rcd->ts) {
751                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
752                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
753                 }
754
755                 /* Check packet type, checksum errors, etc. */
756                 if (rcd->cnc) {
757                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
758                 } else {
759                         if (rcd->v4) {
760                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
761
762                                 if (rcd->ipc)
763                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
764                                 else
765                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
766
767                                 if (rcd->tuc) {
768                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
769                                         if (rcd->tcp)
770                                                 packet_type |= RTE_PTYPE_L4_TCP;
771                                         else
772                                                 packet_type |= RTE_PTYPE_L4_UDP;
773                                 } else {
774                                         if (rcd->tcp) {
775                                                 packet_type |= RTE_PTYPE_L4_TCP;
776                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
777                                         } else if (rcd->udp) {
778                                                 packet_type |= RTE_PTYPE_L4_UDP;
779                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
780                                         }
781                                 }
782                         } else if (rcd->v6) {
783                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
784
785                                 if (rcd->tuc) {
786                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
787                                         if (rcd->tcp)
788                                                 packet_type |= RTE_PTYPE_L4_TCP;
789                                         else
790                                                 packet_type |= RTE_PTYPE_L4_UDP;
791                                 } else {
792                                         if (rcd->tcp) {
793                                                 packet_type |= RTE_PTYPE_L4_TCP;
794                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
795                                         } else if (rcd->udp) {
796                                                 packet_type |= RTE_PTYPE_L4_UDP;
797                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
798                                         }
799                                 }
800                         } else {
801                                 packet_type |= RTE_PTYPE_UNKNOWN;
802                         }
803
804                         /* Old variants of vmxnet3 do not provide MSS */
805                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
806                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
807                                                 rcd, rxm);
808                 }
809         }
810
811         rxm->ol_flags = ol_flags;
812         rxm->packet_type = packet_type;
813 }
814
815 /*
816  * Process the Rx Completion Ring of given vmxnet3_rx_queue
817  * for nb_pkts burst and return the number of packets received
818  */
819 uint16_t
820 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
821 {
822         uint16_t nb_rx;
823         uint32_t nb_rxd, idx;
824         uint8_t ring_idx;
825         vmxnet3_rx_queue_t *rxq;
826         Vmxnet3_RxCompDesc *rcd;
827         vmxnet3_buf_info_t *rbi;
828         Vmxnet3_RxDesc *rxd;
829         struct rte_mbuf *rxm = NULL;
830         struct vmxnet3_hw *hw;
831
832         nb_rx = 0;
833         ring_idx = 0;
834         nb_rxd = 0;
835         idx = 0;
836
837         rxq = rx_queue;
838         hw = rxq->hw;
839
840         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
841
842         if (unlikely(rxq->stopped)) {
843                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
844                 return 0;
845         }
846
847         while (rcd->gen == rxq->comp_ring.gen) {
848                 struct rte_mbuf *newm;
849
850                 if (nb_rx >= nb_pkts)
851                         break;
852
853                 newm = rte_mbuf_raw_alloc(rxq->mp);
854                 if (unlikely(newm == NULL)) {
855                         PMD_RX_LOG(ERR, "Error allocating mbuf");
856                         rxq->stats.rx_buf_alloc_failure++;
857                         break;
858                 }
859
860                 idx = rcd->rxdIdx;
861                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
862                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
863                 RTE_SET_USED(rxd); /* used only for assert when enabled */
864                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
865
866                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
867
868                 RTE_ASSERT(rcd->len <= rxd->len);
869                 RTE_ASSERT(rbi->m);
870
871                 /* Get the packet buffer pointer from buf_info */
872                 rxm = rbi->m;
873
874                 /* Clear descriptor associated buf_info to be reused */
875                 rbi->m = NULL;
876                 rbi->bufPA = 0;
877
878                 /* Update the index that we received a packet */
879                 rxq->cmd_ring[ring_idx].next2comp = idx;
880
881                 /* For RCD with EOP set, check if there is frame error */
882                 if (unlikely(rcd->eop && rcd->err)) {
883                         rxq->stats.drop_total++;
884                         rxq->stats.drop_err++;
885
886                         if (!rcd->fcs) {
887                                 rxq->stats.drop_fcs++;
888                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
889                         }
890                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
891                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
892                                          rxq->comp_ring.base), rcd->rxdIdx);
893                         rte_pktmbuf_free_seg(rxm);
894                         if (rxq->start_seg) {
895                                 struct rte_mbuf *start = rxq->start_seg;
896
897                                 rxq->start_seg = NULL;
898                                 rte_pktmbuf_free(start);
899                         }
900                         goto rcd_done;
901                 }
902
903                 /* Initialize newly received packet buffer */
904                 rxm->port = rxq->port_id;
905                 rxm->nb_segs = 1;
906                 rxm->next = NULL;
907                 rxm->pkt_len = (uint16_t)rcd->len;
908                 rxm->data_len = (uint16_t)rcd->len;
909                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
910                 rxm->ol_flags = 0;
911                 rxm->vlan_tci = 0;
912                 rxm->packet_type = 0;
913
914                 /*
915                  * If this is the first buffer of the received packet,
916                  * set the pointer to the first mbuf of the packet
917                  * Otherwise, update the total length and the number of segments
918                  * of the current scattered packet, and update the pointer to
919                  * the last mbuf of the current packet.
920                  */
921                 if (rcd->sop) {
922                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
923
924                         if (unlikely(rcd->len == 0)) {
925                                 RTE_ASSERT(rcd->eop);
926
927                                 PMD_RX_LOG(DEBUG,
928                                            "Rx buf was skipped. rxring[%d][%d])",
929                                            ring_idx, idx);
930                                 rte_pktmbuf_free_seg(rxm);
931                                 goto rcd_done;
932                         }
933
934                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
935                                 uint8_t *rdd = rxq->data_ring.base +
936                                         idx * rxq->data_desc_size;
937
938                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
939                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
940                                            rdd, rcd->len);
941                         }
942
943                         rxq->start_seg = rxm;
944                         rxq->last_seg = rxm;
945                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
946                 } else {
947                         struct rte_mbuf *start = rxq->start_seg;
948
949                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
950
951                         if (rxm->data_len) {
952                                 start->pkt_len += rxm->data_len;
953                                 start->nb_segs++;
954
955                                 rxq->last_seg->next = rxm;
956                                 rxq->last_seg = rxm;
957                         } else {
958                                 rte_pktmbuf_free_seg(rxm);
959                         }
960                 }
961
962                 if (rcd->eop) {
963                         struct rte_mbuf *start = rxq->start_seg;
964
965                         vmxnet3_rx_offload(hw, rcd, start, 0);
966                         rx_pkts[nb_rx++] = start;
967                         rxq->start_seg = NULL;
968                 }
969
970 rcd_done:
971                 rxq->cmd_ring[ring_idx].next2comp = idx;
972                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
973                                           rxq->cmd_ring[ring_idx].size);
974
975                 /* It's time to renew descriptors */
976                 vmxnet3_renew_desc(rxq, ring_idx, newm);
977                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
978                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
979                                                rxq->cmd_ring[ring_idx].next2fill);
980                 }
981
982                 /* Advance to the next descriptor in comp_ring */
983                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
984
985                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
986                 nb_rxd++;
987                 if (nb_rxd > rxq->cmd_ring[0].size) {
988                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
989                                    " relinquish control.");
990                         break;
991                 }
992         }
993
994         if (unlikely(nb_rxd == 0)) {
995                 uint32_t avail;
996                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
997                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
998                         if (unlikely(avail > 0)) {
999                                 /* try to alloc new buf and renew descriptors */
1000                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1001                         }
1002                 }
1003                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1004                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1005                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1006                                                        rxq->cmd_ring[ring_idx].next2fill);
1007                         }
1008                 }
1009         }
1010
1011         return nb_rx;
1012 }
1013
1014 int
1015 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1016                            uint16_t queue_idx,
1017                            uint16_t nb_desc,
1018                            unsigned int socket_id,
1019                            const struct rte_eth_txconf *tx_conf __rte_unused)
1020 {
1021         struct vmxnet3_hw *hw = dev->data->dev_private;
1022         const struct rte_memzone *mz;
1023         struct vmxnet3_tx_queue *txq;
1024         struct vmxnet3_cmd_ring *ring;
1025         struct vmxnet3_comp_ring *comp_ring;
1026         struct vmxnet3_data_ring *data_ring;
1027         int size;
1028
1029         PMD_INIT_FUNC_TRACE();
1030
1031         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1032                           RTE_CACHE_LINE_SIZE);
1033         if (txq == NULL) {
1034                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1035                 return -ENOMEM;
1036         }
1037
1038         txq->queue_id = queue_idx;
1039         txq->port_id = dev->data->port_id;
1040         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1041         txq->hw = hw;
1042         txq->qid = queue_idx;
1043         txq->stopped = TRUE;
1044         txq->txdata_desc_size = hw->txdata_desc_size;
1045
1046         ring = &txq->cmd_ring;
1047         comp_ring = &txq->comp_ring;
1048         data_ring = &txq->data_ring;
1049
1050         /* Tx vmxnet ring length should be between 512-4096 */
1051         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1052                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1053                              VMXNET3_DEF_TX_RING_SIZE);
1054                 return -EINVAL;
1055         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1056                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1057                              VMXNET3_TX_RING_MAX_SIZE);
1058                 return -EINVAL;
1059         } else {
1060                 ring->size = nb_desc;
1061                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1062         }
1063         comp_ring->size = data_ring->size = ring->size;
1064
1065         /* Tx vmxnet rings structure initialization*/
1066         ring->next2fill = 0;
1067         ring->next2comp = 0;
1068         ring->gen = VMXNET3_INIT_GEN;
1069         comp_ring->next2proc = 0;
1070         comp_ring->gen = VMXNET3_INIT_GEN;
1071
1072         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1073         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1074         size += txq->txdata_desc_size * data_ring->size;
1075
1076         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1077                                       VMXNET3_RING_BA_ALIGN, socket_id);
1078         if (mz == NULL) {
1079                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1080                 return -ENOMEM;
1081         }
1082         txq->mz = mz;
1083         memset(mz->addr, 0, mz->len);
1084
1085         /* cmd_ring initialization */
1086         ring->base = mz->addr;
1087         ring->basePA = mz->iova;
1088
1089         /* comp_ring initialization */
1090         comp_ring->base = ring->base + ring->size;
1091         comp_ring->basePA = ring->basePA +
1092                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1093
1094         /* data_ring initialization */
1095         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1096         data_ring->basePA = comp_ring->basePA +
1097                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1098
1099         /* cmd_ring0 buf_info allocation */
1100         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1101                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1102         if (ring->buf_info == NULL) {
1103                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1104                 return -ENOMEM;
1105         }
1106
1107         /* Update the data portion with txq */
1108         dev->data->tx_queues[queue_idx] = txq;
1109
1110         return 0;
1111 }
1112
1113 int
1114 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1115                            uint16_t queue_idx,
1116                            uint16_t nb_desc,
1117                            unsigned int socket_id,
1118                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1119                            struct rte_mempool *mp)
1120 {
1121         const struct rte_memzone *mz;
1122         struct vmxnet3_rx_queue *rxq;
1123         struct vmxnet3_hw *hw = dev->data->dev_private;
1124         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1125         struct vmxnet3_comp_ring *comp_ring;
1126         struct vmxnet3_rx_data_ring *data_ring;
1127         int size;
1128         uint8_t i;
1129         char mem_name[32];
1130
1131         PMD_INIT_FUNC_TRACE();
1132
1133         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1134                           RTE_CACHE_LINE_SIZE);
1135         if (rxq == NULL) {
1136                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1137                 return -ENOMEM;
1138         }
1139
1140         rxq->mp = mp;
1141         rxq->queue_id = queue_idx;
1142         rxq->port_id = dev->data->port_id;
1143         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1144         rxq->hw = hw;
1145         rxq->qid1 = queue_idx;
1146         rxq->qid2 = queue_idx + hw->num_rx_queues;
1147         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1148         rxq->data_desc_size = hw->rxdata_desc_size;
1149         rxq->stopped = TRUE;
1150
1151         ring0 = &rxq->cmd_ring[0];
1152         ring1 = &rxq->cmd_ring[1];
1153         comp_ring = &rxq->comp_ring;
1154         data_ring = &rxq->data_ring;
1155
1156         /* Rx vmxnet rings length should be between 256-4096 */
1157         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1158                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1159                 return -EINVAL;
1160         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1161                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1162                 return -EINVAL;
1163         } else {
1164                 ring0->size = nb_desc;
1165                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1166                 ring1->size = ring0->size;
1167         }
1168
1169         comp_ring->size = ring0->size + ring1->size;
1170         data_ring->size = ring0->size;
1171
1172         /* Rx vmxnet rings structure initialization */
1173         ring0->next2fill = 0;
1174         ring1->next2fill = 0;
1175         ring0->next2comp = 0;
1176         ring1->next2comp = 0;
1177         ring0->gen = VMXNET3_INIT_GEN;
1178         ring1->gen = VMXNET3_INIT_GEN;
1179         comp_ring->next2proc = 0;
1180         comp_ring->gen = VMXNET3_INIT_GEN;
1181
1182         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1183         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1184         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1185                 size += rxq->data_desc_size * data_ring->size;
1186
1187         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1188                                       VMXNET3_RING_BA_ALIGN, socket_id);
1189         if (mz == NULL) {
1190                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1191                 return -ENOMEM;
1192         }
1193         rxq->mz = mz;
1194         memset(mz->addr, 0, mz->len);
1195
1196         /* cmd_ring0 initialization */
1197         ring0->base = mz->addr;
1198         ring0->basePA = mz->iova;
1199
1200         /* cmd_ring1 initialization */
1201         ring1->base = ring0->base + ring0->size;
1202         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1203
1204         /* comp_ring initialization */
1205         comp_ring->base = ring1->base + ring1->size;
1206         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1207                 ring1->size;
1208
1209         /* data_ring initialization */
1210         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1211                 data_ring->base =
1212                         (uint8_t *)(comp_ring->base + comp_ring->size);
1213                 data_ring->basePA = comp_ring->basePA +
1214                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1215         }
1216
1217         /* cmd_ring0-cmd_ring1 buf_info allocation */
1218         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1219
1220                 ring = &rxq->cmd_ring[i];
1221                 ring->rid = i;
1222                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1223
1224                 ring->buf_info = rte_zmalloc(mem_name,
1225                                              ring->size * sizeof(vmxnet3_buf_info_t),
1226                                              RTE_CACHE_LINE_SIZE);
1227                 if (ring->buf_info == NULL) {
1228                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1229                         return -ENOMEM;
1230                 }
1231         }
1232
1233         /* Update the data portion with rxq */
1234         dev->data->rx_queues[queue_idx] = rxq;
1235
1236         return 0;
1237 }
1238
1239 /*
1240  * Initializes Receive Unit
1241  * Load mbufs in rx queue in advance
1242  */
1243 int
1244 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1245 {
1246         struct vmxnet3_hw *hw = dev->data->dev_private;
1247
1248         int i, ret;
1249         uint8_t j;
1250
1251         PMD_INIT_FUNC_TRACE();
1252
1253         for (i = 0; i < hw->num_rx_queues; i++) {
1254                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1255
1256                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1257                         /* Passing 0 as alloc_num will allocate full ring */
1258                         ret = vmxnet3_post_rx_bufs(rxq, j);
1259                         if (ret <= 0) {
1260                                 PMD_INIT_LOG(ERR,
1261                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1262                                              i, j);
1263                                 return -ret;
1264                         }
1265                         /*
1266                          * Updating device with the index:next2fill to fill the
1267                          * mbufs for coming packets.
1268                          */
1269                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1270                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1271                                                        rxq->cmd_ring[j].next2fill);
1272                         }
1273                 }
1274                 rxq->stopped = FALSE;
1275                 rxq->start_seg = NULL;
1276         }
1277
1278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1280
1281                 txq->stopped = FALSE;
1282         }
1283
1284         return 0;
1285 }
1286
1287 static uint8_t rss_intel_key[40] = {
1288         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1289         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1290         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1291         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1292         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1293 };
1294
1295 /*
1296  * Additional RSS configurations based on vmxnet v4+ APIs
1297  */
1298 int
1299 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1300 {
1301         struct vmxnet3_hw *hw = dev->data->dev_private;
1302         Vmxnet3_DriverShared *shared = hw->shared;
1303         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1304         struct rte_eth_rss_conf *port_rss_conf;
1305         uint64_t rss_hf;
1306         uint32_t ret;
1307
1308         PMD_INIT_FUNC_TRACE();
1309
1310         cmdInfo->setRSSFields = 0;
1311         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1312         rss_hf = port_rss_conf->rss_hf &
1313                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1314
1315         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1316                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1317         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1318                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1319         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1320                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1321         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1322                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1323
1324         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1325                                VMXNET3_CMD_SET_RSS_FIELDS);
1326         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1327
1328         if (ret != VMXNET3_SUCCESS) {
1329                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1330         }
1331
1332         return ret;
1333 }
1334
1335 /*
1336  * Configure RSS feature
1337  */
1338 int
1339 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1340 {
1341         struct vmxnet3_hw *hw = dev->data->dev_private;
1342         struct VMXNET3_RSSConf *dev_rss_conf;
1343         struct rte_eth_rss_conf *port_rss_conf;
1344         uint64_t rss_hf;
1345         uint8_t i, j;
1346
1347         PMD_INIT_FUNC_TRACE();
1348
1349         dev_rss_conf = hw->rss_conf;
1350         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1351
1352         /* loading hashFunc */
1353         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1354         /* loading hashKeySize */
1355         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1356         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1357         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1358
1359         if (port_rss_conf->rss_key == NULL) {
1360                 /* Default hash key */
1361                 port_rss_conf->rss_key = rss_intel_key;
1362         }
1363
1364         /* loading hashKey */
1365         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1366                dev_rss_conf->hashKeySize);
1367
1368         /* loading indTable */
1369         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1370                 if (j == dev->data->nb_rx_queues)
1371                         j = 0;
1372                 dev_rss_conf->indTable[i] = j;
1373         }
1374
1375         /* loading hashType */
1376         dev_rss_conf->hashType = 0;
1377         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1378         if (rss_hf & ETH_RSS_IPV4)
1379                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1380         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1381                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1382         if (rss_hf & ETH_RSS_IPV6)
1383                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1384         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1385                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1386
1387         return VMXNET3_SUCCESS;
1388 }