test/bonding: fix RSS test when disable RSS
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \
52                 RTE_MBUF_F_TX_IPV6 |     \
53                 RTE_MBUF_F_TX_IPV4 |     \
54                 RTE_MBUF_F_TX_L4_MASK |  \
55                 RTE_MBUF_F_TX_TCP_SEG)
56
57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
58         (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73         uint32_t avail = 0;
74
75         if (rxq == NULL)
76                 return;
77
78         PMD_RX_LOG(DEBUG,
79                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81         PMD_RX_LOG(DEBUG,
82                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83                    (unsigned long)rxq->cmd_ring[0].basePA,
84                    (unsigned long)rxq->cmd_ring[1].basePA,
85                    (unsigned long)rxq->comp_ring.basePA);
86
87         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88         PMD_RX_LOG(DEBUG,
89                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90                    (uint32_t)rxq->cmd_ring[0].size, avail,
91                    rxq->comp_ring.next2proc,
92                    rxq->cmd_ring[0].size - avail);
93
94         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97                    rxq->cmd_ring[1].size - avail);
98
99 }
100
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104         uint32_t avail = 0;
105
106         if (txq == NULL)
107                 return;
108
109         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112                    (unsigned long)txq->cmd_ring.basePA,
113                    (unsigned long)txq->comp_ring.basePA,
114                    (unsigned long)txq->data_ring.basePA);
115
116         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)txq->cmd_ring.size, avail,
119                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126         while (ring->next2comp != ring->next2fill) {
127                 /* No need to worry about desc ownership, device is quiesced by now. */
128                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129
130                 if (buf_info->m) {
131                         rte_pktmbuf_free(buf_info->m);
132                         buf_info->m = NULL;
133                         buf_info->bufPA = 0;
134                         buf_info->len = 0;
135                 }
136                 vmxnet3_cmd_ring_adv_next2comp(ring);
137         }
138 }
139
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143         uint32_t i;
144
145         for (i = 0; i < ring->size; i++) {
146                 /* No need to worry about desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free_seg(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         rte_free(ring->buf_info);
163         ring->buf_info = NULL;
164 }
165
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169         vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170
171         if (tq != NULL) {
172                 /* Release mbufs */
173                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176                 /* Release the memzone */
177                 rte_memzone_free(tq->mz);
178                 /* Release the queue */
179                 rte_free(tq);
180         }
181 }
182
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186         int i;
187         vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188
189         if (rq != NULL) {
190                 /* Release mbufs */
191                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193
194                 /* Release both the cmd_rings */
195                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197
198                 /* Release the memzone */
199                 rte_memzone_free(rq->mz);
200
201                 /* Release the queue */
202                 rte_free(rq);
203         }
204 }
205
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209         vmxnet3_tx_queue_t *tq = txq;
210         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213         int size;
214
215         if (tq != NULL) {
216                 /* Release the cmd_ring mbufs */
217                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218         }
219
220         /* Tx vmxnet rings structure initialization*/
221         ring->next2fill = 0;
222         ring->next2comp = 0;
223         ring->gen = VMXNET3_INIT_GEN;
224         comp_ring->next2proc = 0;
225         comp_ring->gen = VMXNET3_INIT_GEN;
226
227         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229         size += tq->txdata_desc_size * data_ring->size;
230
231         memset(ring->base, 0, size);
232 }
233
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237         int i;
238         vmxnet3_rx_queue_t *rq = rxq;
239         struct vmxnet3_hw *hw = rq->hw;
240         struct vmxnet3_cmd_ring *ring0, *ring1;
241         struct vmxnet3_comp_ring *comp_ring;
242         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243         int size;
244
245         /* Release both the cmd_rings mbufs */
246         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248
249         ring0 = &rq->cmd_ring[0];
250         ring1 = &rq->cmd_ring[1];
251         comp_ring = &rq->comp_ring;
252
253         /* Rx vmxnet rings structure initialization */
254         ring0->next2fill = 0;
255         ring1->next2fill = 0;
256         ring0->next2comp = 0;
257         ring1->next2comp = 0;
258         ring0->gen = VMXNET3_INIT_GEN;
259         ring1->gen = VMXNET3_INIT_GEN;
260         comp_ring->next2proc = 0;
261         comp_ring->gen = VMXNET3_INIT_GEN;
262
263         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266                 size += rq->data_desc_size * data_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302
303         /* Release cmd_ring descriptor and free mbuf */
304         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305
306         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307         if (mbuf == NULL)
308                 rte_panic("EOP desc does not point to a valid mbuf");
309         rte_pktmbuf_free(mbuf);
310
311         txq->cmd_ring.buf_info[eop_idx].m = NULL;
312
313         while (txq->cmd_ring.next2comp != eop_idx) {
314                 /* no out-of-order completion */
315                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                 completed++;
318         }
319
320         /* Mark the txd for which tcd was generated as completed */
321         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323         return completed + 1;
324 }
325
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329         int completed = 0;
330         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332                 (comp_ring->base + comp_ring->next2proc);
333
334         while (tcd->gen == comp_ring->gen) {
335                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336
337                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
338                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339                                                     comp_ring->next2proc);
340         }
341
342         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343
344         /* To avoid compiler warnings when not in DEBUG mode. */
345         RTE_SET_USED(completed);
346 }
347
348 uint16_t
349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350         uint16_t nb_pkts)
351 {
352         int32_t ret;
353         uint32_t i;
354         uint64_t ol_flags;
355         struct rte_mbuf *m;
356
357         for (i = 0; i != nb_pkts; i++) {
358                 m = tx_pkts[i];
359                 ol_flags = m->ol_flags;
360
361                 /* Non-TSO packet cannot occupy more than
362                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363                  */
364                 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366                         rte_errno = EINVAL;
367                         return i;
368                 }
369
370                 /* check that only supported TX offloads are requested. */
371                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372                                 (ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373                                 RTE_MBUF_F_TX_SCTP_CKSUM) {
374                         rte_errno = ENOTSUP;
375                         return i;
376                 }
377
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379                 ret = rte_validate_tx_offload(m);
380                 if (ret != 0) {
381                         rte_errno = -ret;
382                         return i;
383                 }
384 #endif
385                 ret = rte_net_intel_cksum_prepare(m);
386                 if (ret != 0) {
387                         rte_errno = -ret;
388                         return i;
389                 }
390         }
391
392         return i;
393 }
394
395 uint16_t
396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397                   uint16_t nb_pkts)
398 {
399         uint16_t nb_tx;
400         vmxnet3_tx_queue_t *txq = tx_queue;
401         struct vmxnet3_hw *hw = txq->hw;
402         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404
405         if (unlikely(txq->stopped)) {
406                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407                 return 0;
408         }
409
410         /* Free up the comp_descriptors aggressively */
411         vmxnet3_tq_tx_complete(txq);
412
413         nb_tx = 0;
414         while (nb_tx < nb_pkts) {
415                 Vmxnet3_GenericDesc *gdesc;
416                 vmxnet3_buf_info_t *tbi;
417                 uint32_t first2fill, avail, dw2;
418                 struct rte_mbuf *txm = tx_pkts[nb_tx];
419                 struct rte_mbuf *m_seg = txm;
420                 int copy_size = 0;
421                 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422                 /* # of descriptors needed for a packet. */
423                 unsigned count = txm->nb_segs;
424
425                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426                 if (count > avail) {
427                         /* Is command ring full? */
428                         if (unlikely(avail == 0)) {
429                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
430                                 txq->stats.tx_ring_full++;
431                                 txq->stats.drop_total += (nb_pkts - nb_tx);
432                                 break;
433                         }
434
435                         /* Command ring is not full but cannot handle the
436                          * multi-segmented packet. Let's try the next packet
437                          * in this case.
438                          */
439                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440                                    "(avail %d needed %d)", avail, count);
441                         txq->stats.drop_total++;
442                         if (tso)
443                                 txq->stats.drop_tso++;
444                         rte_pktmbuf_free(txm);
445                         nb_tx++;
446                         continue;
447                 }
448
449                 /* Drop non-TSO packet that is excessively fragmented */
450                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453                         txq->stats.drop_too_many_segs++;
454                         txq->stats.drop_total++;
455                         rte_pktmbuf_free(txm);
456                         nb_tx++;
457                         continue;
458                 }
459
460                 if (txm->nb_segs == 1 &&
461                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462                         struct Vmxnet3_TxDataDesc *tdd;
463
464                         /* Skip empty packets */
465                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466                                 txq->stats.drop_total++;
467                                 rte_pktmbuf_free(txm);
468                                 nb_tx++;
469                                 continue;
470                         }
471
472                         tdd = (struct Vmxnet3_TxDataDesc *)
473                                 ((uint8 *)txq->data_ring.base +
474                                  txq->cmd_ring.next2fill *
475                                  txq->txdata_desc_size);
476                         copy_size = rte_pktmbuf_pkt_len(txm);
477                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478                 }
479
480                 /* use the previous gen bit for the SOP desc */
481                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482                 first2fill = txq->cmd_ring.next2fill;
483                 do {
484                         /* Remember the transmit buffer for cleanup */
485                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486
487                         /* NB: the following assumes that VMXNET3 maximum
488                          * transmit buffer size (16K) is greater than
489                          * maximum size of mbuf segment size.
490                          */
491                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492
493                         /* Skip empty segments */
494                         if (unlikely(m_seg->data_len == 0))
495                                 continue;
496
497                         if (copy_size) {
498                                 uint64 offset =
499                                         (uint64)txq->cmd_ring.next2fill *
500                                                         txq->txdata_desc_size;
501                                 gdesc->txd.addr =
502                                         rte_cpu_to_le_64(txq->data_ring.basePA +
503                                                          offset);
504                         } else {
505                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506                         }
507
508                         gdesc->dword[2] = dw2 | m_seg->data_len;
509                         gdesc->dword[3] = 0;
510
511                         /* move to the next2fill descriptor */
512                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513
514                         /* use the right gen for non-SOP desc */
515                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516                 } while ((m_seg = m_seg->next) != NULL);
517
518                 /* set the last buf_info for the pkt */
519                 tbi->m = txm;
520                 /* Update the EOP descriptor */
521                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522
523                 /* Add VLAN tag if present */
524                 gdesc = txq->cmd_ring.base + first2fill;
525                 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526                         gdesc->txd.ti = 1;
527                         gdesc->txd.tci = txm->vlan_tci;
528                 }
529
530                 if (tso) {
531                         uint16_t mss = txm->tso_segsz;
532
533                         RTE_ASSERT(mss > 0);
534
535                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536                         gdesc->txd.om = VMXNET3_OM_TSO;
537                         gdesc->txd.msscof = mss;
538
539                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540                 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541                         gdesc->txd.om = VMXNET3_OM_CSUM;
542                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543
544                         switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545                         case RTE_MBUF_F_TX_TCP_CKSUM:
546                                 gdesc->txd.msscof = gdesc->txd.hlen +
547                                         offsetof(struct rte_tcp_hdr, cksum);
548                                 break;
549                         case RTE_MBUF_F_TX_UDP_CKSUM:
550                                 gdesc->txd.msscof = gdesc->txd.hlen +
551                                         offsetof(struct rte_udp_hdr,
552                                                 dgram_cksum);
553                                 break;
554                         default:
555                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556                                            txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557                                 abort();
558                         }
559                         deferred++;
560                 } else {
561                         gdesc->txd.hlen = 0;
562                         gdesc->txd.om = VMXNET3_OM_NONE;
563                         gdesc->txd.msscof = 0;
564                         deferred++;
565                 }
566
567                 /* flip the GEN bit on the SOP */
568                 rte_compiler_barrier();
569                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570
571                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572                 nb_tx++;
573         }
574
575         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576
577         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578                 txq_ctrl->txNumDeferred = 0;
579                 /* Notify vSwitch that packets are available. */
580                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581                                        txq->cmd_ring.next2fill);
582         }
583
584         return nb_tx;
585 }
586
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589                    struct rte_mbuf *mbuf)
590 {
591         uint32_t val;
592         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593         struct Vmxnet3_RxDesc *rxd =
594                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596
597         if (ring_id == 0) {
598                 /* Usually: One HEAD type buf per packet
599                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601                  */
602
603                 /* We use single packet buffer so all heads here */
604                 val = VMXNET3_RXD_BTYPE_HEAD;
605         } else {
606                 /* All BODY type buffers for 2nd ring */
607                 val = VMXNET3_RXD_BTYPE_BODY;
608         }
609
610         /*
611          * Load mbuf pointer into buf_info[ring_size]
612          * buf_info structure is equivalent to cookie for virtio-virtqueue
613          */
614         buf_info->m = mbuf;
615         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617
618         /* Load Rx Descriptor with the buffer's GPA */
619         rxd->addr = buf_info->bufPA;
620
621         /* After this point rxd->addr MUST not be NULL */
622         rxd->btype = val;
623         rxd->len = buf_info->len;
624         /* Flip gen bit at the end to change ownership */
625         rxd->gen = ring->gen;
626
627         vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
631  *  so that device can receive packets in those buffers.
632  *  Ring layout:
633  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
634  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
635  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
637  *      only for LRO.
638  */
639 static int
640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642         int err = 0;
643         uint32_t i = 0;
644         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645
646         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647                 struct rte_mbuf *mbuf;
648
649                 /* Allocate blank mbuf for the current Rx Descriptor */
650                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
651                 if (unlikely(mbuf == NULL)) {
652                         PMD_RX_LOG(ERR, "Error allocating mbuf");
653                         rxq->stats.rx_buf_alloc_failure++;
654                         err = ENOMEM;
655                         break;
656                 }
657
658                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
659                 i++;
660         }
661
662         /* Return error only if no buffers are posted at present */
663         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664                 return -err;
665         else
666                 return i;
667 }
668
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672                 struct rte_mbuf *rxm)
673 {
674         uint32_t hlen, slen;
675         struct rte_ipv4_hdr *ipv4_hdr;
676         struct rte_ipv6_hdr *ipv6_hdr;
677         struct rte_tcp_hdr *tcp_hdr;
678         char *ptr;
679         uint8_t segs;
680
681         RTE_ASSERT(rcd->tcp);
682
683         ptr = rte_pktmbuf_mtod(rxm, char *);
684         slen = rte_pktmbuf_data_len(rxm);
685         hlen = sizeof(struct rte_ether_hdr);
686
687         if (rcd->v4) {
688                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
690                                         - sizeof(struct rte_tcp_hdr);
691
692                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
694         } else if (rcd->v6) {
695                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697                                         sizeof(struct rte_tcp_hdr);
698
699                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700                 hlen += sizeof(struct rte_ipv6_hdr);
701                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702                         int frag;
703
704                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705                                         &hlen, &frag);
706                 }
707         }
708
709         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711                                 sizeof(struct rte_ether_hdr);
712
713         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715
716         segs = *vmxnet3_segs_dynfield(rxm);
717         if (segs > 1)
718                 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719         else
720                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722
723 /* Receive side checksum and other offloads */
724 static inline void
725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726                 struct rte_mbuf *rxm, const uint8_t sop)
727 {
728         uint64_t ol_flags = rxm->ol_flags;
729         uint32_t packet_type = rxm->packet_type;
730
731         /* Offloads set in sop */
732         if (sop) {
733                 /* Set packet type */
734                 packet_type |= RTE_PTYPE_L2_ETHER;
735
736                 /* Check large packet receive */
737                 if (VMXNET3_VERSION_GE_2(hw) &&
738                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739                         const Vmxnet3_RxCompDescExt *rcde =
740                                         (const Vmxnet3_RxCompDescExt *)rcd;
741
742                         rxm->tso_segsz = rcde->mss;
743                         *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744                         ol_flags |= RTE_MBUF_F_RX_LRO;
745                 }
746         } else { /* Offloads set in eop */
747                 /* Check for RSS */
748                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749                         ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750                         rxm->hash.rss = rcd->rssHash;
751                 }
752
753                 /* Check for hardware stripped VLAN tag */
754                 if (rcd->ts) {
755                         ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757                 }
758
759                 /* Check packet type, checksum errors, etc. */
760                 if (rcd->cnc) {
761                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762
763                         if (rcd->v4) {
764                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765                                 if (rcd->tcp)
766                                         packet_type |= RTE_PTYPE_L4_TCP;
767                                 else if (rcd->udp)
768                                         packet_type |= RTE_PTYPE_L4_UDP;
769                         } else if (rcd->v6) {
770                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
771                                 if (rcd->tcp)
772                                         packet_type |= RTE_PTYPE_L4_TCP;
773                                 else if (rcd->udp)
774                                         packet_type |= RTE_PTYPE_L4_UDP;
775                         } else {
776                                 packet_type |= RTE_PTYPE_UNKNOWN;
777                         }
778
779                 } else {
780                         if (rcd->v4) {
781                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
782
783                                 if (rcd->ipc)
784                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
785                                 else
786                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
787
788                                 if (rcd->tuc) {
789                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
790                                         if (rcd->tcp)
791                                                 packet_type |= RTE_PTYPE_L4_TCP;
792                                         else
793                                                 packet_type |= RTE_PTYPE_L4_UDP;
794                                 } else {
795                                         if (rcd->tcp) {
796                                                 packet_type |= RTE_PTYPE_L4_TCP;
797                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
798                                         } else if (rcd->udp) {
799                                                 packet_type |= RTE_PTYPE_L4_UDP;
800                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
801                                         }
802                                 }
803                         } else if (rcd->v6) {
804                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
805
806                                 if (rcd->tuc) {
807                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
808                                         if (rcd->tcp)
809                                                 packet_type |= RTE_PTYPE_L4_TCP;
810                                         else
811                                                 packet_type |= RTE_PTYPE_L4_UDP;
812                                 } else {
813                                         if (rcd->tcp) {
814                                                 packet_type |= RTE_PTYPE_L4_TCP;
815                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
816                                         } else if (rcd->udp) {
817                                                 packet_type |= RTE_PTYPE_L4_UDP;
818                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
819                                         }
820                                 }
821                         } else {
822                                 packet_type |= RTE_PTYPE_UNKNOWN;
823                         }
824
825                         /* Old variants of vmxnet3 do not provide MSS */
826                         if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
827                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
828                                                 rcd, rxm);
829                 }
830         }
831
832         rxm->ol_flags = ol_flags;
833         rxm->packet_type = packet_type;
834 }
835
836 /*
837  * Process the Rx Completion Ring of given vmxnet3_rx_queue
838  * for nb_pkts burst and return the number of packets received
839  */
840 uint16_t
841 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
842 {
843         uint16_t nb_rx;
844         uint32_t nb_rxd, idx;
845         uint8_t ring_idx;
846         vmxnet3_rx_queue_t *rxq;
847         Vmxnet3_RxCompDesc *rcd;
848         vmxnet3_buf_info_t *rbi;
849         Vmxnet3_RxDesc *rxd;
850         struct rte_mbuf *rxm = NULL;
851         struct vmxnet3_hw *hw;
852
853         nb_rx = 0;
854         ring_idx = 0;
855         nb_rxd = 0;
856         idx = 0;
857
858         rxq = rx_queue;
859         hw = rxq->hw;
860
861         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
862
863         if (unlikely(rxq->stopped)) {
864                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
865                 return 0;
866         }
867
868         while (rcd->gen == rxq->comp_ring.gen) {
869                 struct rte_mbuf *newm;
870
871                 if (nb_rx >= nb_pkts)
872                         break;
873
874                 newm = rte_mbuf_raw_alloc(rxq->mp);
875                 if (unlikely(newm == NULL)) {
876                         PMD_RX_LOG(ERR, "Error allocating mbuf");
877                         rxq->stats.rx_buf_alloc_failure++;
878                         break;
879                 }
880
881                 idx = rcd->rxdIdx;
882                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
883                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
884                 RTE_SET_USED(rxd); /* used only for assert when enabled */
885                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
886
887                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
888
889                 RTE_ASSERT(rcd->len <= rxd->len);
890                 RTE_ASSERT(rbi->m);
891
892                 /* Get the packet buffer pointer from buf_info */
893                 rxm = rbi->m;
894
895                 /* Clear descriptor associated buf_info to be reused */
896                 rbi->m = NULL;
897                 rbi->bufPA = 0;
898
899                 /* Update the index that we received a packet */
900                 rxq->cmd_ring[ring_idx].next2comp = idx;
901
902                 /* For RCD with EOP set, check if there is frame error */
903                 if (unlikely(rcd->eop && rcd->err)) {
904                         rxq->stats.drop_total++;
905                         rxq->stats.drop_err++;
906
907                         if (!rcd->fcs) {
908                                 rxq->stats.drop_fcs++;
909                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
910                         }
911                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
912                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
913                                          rxq->comp_ring.base), rcd->rxdIdx);
914                         rte_pktmbuf_free_seg(rxm);
915                         if (rxq->start_seg) {
916                                 struct rte_mbuf *start = rxq->start_seg;
917
918                                 rxq->start_seg = NULL;
919                                 rte_pktmbuf_free(start);
920                         }
921                         goto rcd_done;
922                 }
923
924                 /* Initialize newly received packet buffer */
925                 rxm->port = rxq->port_id;
926                 rxm->nb_segs = 1;
927                 rxm->next = NULL;
928                 rxm->pkt_len = (uint16_t)rcd->len;
929                 rxm->data_len = (uint16_t)rcd->len;
930                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
931                 rxm->ol_flags = 0;
932                 rxm->vlan_tci = 0;
933                 rxm->packet_type = 0;
934
935                 /*
936                  * If this is the first buffer of the received packet,
937                  * set the pointer to the first mbuf of the packet
938                  * Otherwise, update the total length and the number of segments
939                  * of the current scattered packet, and update the pointer to
940                  * the last mbuf of the current packet.
941                  */
942                 if (rcd->sop) {
943                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
944
945                         if (unlikely(rcd->len == 0)) {
946                                 RTE_ASSERT(rcd->eop);
947
948                                 PMD_RX_LOG(DEBUG,
949                                            "Rx buf was skipped. rxring[%d][%d])",
950                                            ring_idx, idx);
951                                 rte_pktmbuf_free_seg(rxm);
952                                 goto rcd_done;
953                         }
954
955                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
956                                 uint8_t *rdd = rxq->data_ring.base +
957                                         idx * rxq->data_desc_size;
958
959                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
960                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
961                                            rdd, rcd->len);
962                         }
963
964                         rxq->start_seg = rxm;
965                         rxq->last_seg = rxm;
966                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
967                 } else {
968                         struct rte_mbuf *start = rxq->start_seg;
969
970                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
971
972                         if (likely(start && rxm->data_len > 0)) {
973                                 start->pkt_len += rxm->data_len;
974                                 start->nb_segs++;
975
976                                 rxq->last_seg->next = rxm;
977                                 rxq->last_seg = rxm;
978                         } else {
979                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
980                                 rxq->stats.drop_total++;
981                                 rxq->stats.drop_err++;
982
983                                 rte_pktmbuf_free_seg(rxm);
984                         }
985                 }
986
987                 if (rcd->eop) {
988                         struct rte_mbuf *start = rxq->start_seg;
989
990                         vmxnet3_rx_offload(hw, rcd, start, 0);
991                         rx_pkts[nb_rx++] = start;
992                         rxq->start_seg = NULL;
993                 }
994
995 rcd_done:
996                 rxq->cmd_ring[ring_idx].next2comp = idx;
997                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
998                                           rxq->cmd_ring[ring_idx].size);
999
1000                 /* It's time to renew descriptors */
1001                 vmxnet3_renew_desc(rxq, ring_idx, newm);
1002                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1003                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1004                                                rxq->cmd_ring[ring_idx].next2fill);
1005                 }
1006
1007                 /* Advance to the next descriptor in comp_ring */
1008                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
1009
1010                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
1011                 nb_rxd++;
1012                 if (nb_rxd > rxq->cmd_ring[0].size) {
1013                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
1014                                    " relinquish control.");
1015                         break;
1016                 }
1017         }
1018
1019         if (unlikely(nb_rxd == 0)) {
1020                 uint32_t avail;
1021                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1022                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1023                         if (unlikely(avail > 0)) {
1024                                 /* try to alloc new buf and renew descriptors */
1025                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1026                         }
1027                 }
1028                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1029                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1030                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1031                                                        rxq->cmd_ring[ring_idx].next2fill);
1032                         }
1033                 }
1034         }
1035
1036         return nb_rx;
1037 }
1038
1039 uint32_t
1040 vmxnet3_dev_rx_queue_count(void *rx_queue)
1041 {
1042         const vmxnet3_rx_queue_t *rxq;
1043         const Vmxnet3_RxCompDesc *rcd;
1044         uint32_t idx, nb_rxd = 0;
1045         uint8_t gen;
1046
1047         rxq = rx_queue;
1048         if (unlikely(rxq->stopped)) {
1049                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
1050                 return 0;
1051         }
1052
1053         gen = rxq->comp_ring.gen;
1054         idx = rxq->comp_ring.next2proc;
1055         rcd = &rxq->comp_ring.base[idx].rcd;
1056         while (rcd->gen == gen) {
1057                 if (rcd->eop)
1058                         ++nb_rxd;
1059                 if (++idx == rxq->comp_ring.size) {
1060                         idx = 0;
1061                         gen ^= 1;
1062                 }
1063                 rcd = &rxq->comp_ring.base[idx].rcd;
1064         }
1065
1066         return nb_rxd;
1067 }
1068
1069 int
1070 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1071                            uint16_t queue_idx,
1072                            uint16_t nb_desc,
1073                            unsigned int socket_id,
1074                            const struct rte_eth_txconf *tx_conf __rte_unused)
1075 {
1076         struct vmxnet3_hw *hw = dev->data->dev_private;
1077         const struct rte_memzone *mz;
1078         struct vmxnet3_tx_queue *txq;
1079         struct vmxnet3_cmd_ring *ring;
1080         struct vmxnet3_comp_ring *comp_ring;
1081         struct vmxnet3_data_ring *data_ring;
1082         int size;
1083
1084         PMD_INIT_FUNC_TRACE();
1085
1086         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1087                           RTE_CACHE_LINE_SIZE);
1088         if (txq == NULL) {
1089                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1090                 return -ENOMEM;
1091         }
1092
1093         txq->queue_id = queue_idx;
1094         txq->port_id = dev->data->port_id;
1095         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1096         txq->hw = hw;
1097         txq->qid = queue_idx;
1098         txq->stopped = TRUE;
1099         txq->txdata_desc_size = hw->txdata_desc_size;
1100
1101         ring = &txq->cmd_ring;
1102         comp_ring = &txq->comp_ring;
1103         data_ring = &txq->data_ring;
1104
1105         /* Tx vmxnet ring length should be between 512-4096 */
1106         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1107                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1108                              VMXNET3_DEF_TX_RING_SIZE);
1109                 return -EINVAL;
1110         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1111                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1112                              VMXNET3_TX_RING_MAX_SIZE);
1113                 return -EINVAL;
1114         } else {
1115                 ring->size = nb_desc;
1116                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1117         }
1118         comp_ring->size = data_ring->size = ring->size;
1119
1120         /* Tx vmxnet rings structure initialization*/
1121         ring->next2fill = 0;
1122         ring->next2comp = 0;
1123         ring->gen = VMXNET3_INIT_GEN;
1124         comp_ring->next2proc = 0;
1125         comp_ring->gen = VMXNET3_INIT_GEN;
1126
1127         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1128         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1129         size += txq->txdata_desc_size * data_ring->size;
1130
1131         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1132                                       VMXNET3_RING_BA_ALIGN, socket_id);
1133         if (mz == NULL) {
1134                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1135                 return -ENOMEM;
1136         }
1137         txq->mz = mz;
1138         memset(mz->addr, 0, mz->len);
1139
1140         /* cmd_ring initialization */
1141         ring->base = mz->addr;
1142         ring->basePA = mz->iova;
1143
1144         /* comp_ring initialization */
1145         comp_ring->base = ring->base + ring->size;
1146         comp_ring->basePA = ring->basePA +
1147                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1148
1149         /* data_ring initialization */
1150         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1151         data_ring->basePA = comp_ring->basePA +
1152                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1153
1154         /* cmd_ring0 buf_info allocation */
1155         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1156                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1157         if (ring->buf_info == NULL) {
1158                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1159                 return -ENOMEM;
1160         }
1161
1162         /* Update the data portion with txq */
1163         dev->data->tx_queues[queue_idx] = txq;
1164
1165         return 0;
1166 }
1167
1168 int
1169 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1170                            uint16_t queue_idx,
1171                            uint16_t nb_desc,
1172                            unsigned int socket_id,
1173                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1174                            struct rte_mempool *mp)
1175 {
1176         const struct rte_memzone *mz;
1177         struct vmxnet3_rx_queue *rxq;
1178         struct vmxnet3_hw *hw = dev->data->dev_private;
1179         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1180         struct vmxnet3_comp_ring *comp_ring;
1181         struct vmxnet3_rx_data_ring *data_ring;
1182         int size;
1183         uint8_t i;
1184         char mem_name[32];
1185
1186         PMD_INIT_FUNC_TRACE();
1187
1188         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1189                           RTE_CACHE_LINE_SIZE);
1190         if (rxq == NULL) {
1191                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1192                 return -ENOMEM;
1193         }
1194
1195         rxq->mp = mp;
1196         rxq->queue_id = queue_idx;
1197         rxq->port_id = dev->data->port_id;
1198         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1199         rxq->hw = hw;
1200         rxq->qid1 = queue_idx;
1201         rxq->qid2 = queue_idx + hw->num_rx_queues;
1202         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1203         rxq->data_desc_size = hw->rxdata_desc_size;
1204         rxq->stopped = TRUE;
1205
1206         ring0 = &rxq->cmd_ring[0];
1207         ring1 = &rxq->cmd_ring[1];
1208         comp_ring = &rxq->comp_ring;
1209         data_ring = &rxq->data_ring;
1210
1211         /* Rx vmxnet rings length should be between 256-4096 */
1212         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1213                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1214                 return -EINVAL;
1215         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1216                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1217                 return -EINVAL;
1218         } else {
1219                 ring0->size = nb_desc;
1220                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1221                 ring1->size = ring0->size;
1222         }
1223
1224         comp_ring->size = ring0->size + ring1->size;
1225         data_ring->size = ring0->size;
1226
1227         /* Rx vmxnet rings structure initialization */
1228         ring0->next2fill = 0;
1229         ring1->next2fill = 0;
1230         ring0->next2comp = 0;
1231         ring1->next2comp = 0;
1232         ring0->gen = VMXNET3_INIT_GEN;
1233         ring1->gen = VMXNET3_INIT_GEN;
1234         comp_ring->next2proc = 0;
1235         comp_ring->gen = VMXNET3_INIT_GEN;
1236
1237         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1238         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1239         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1240                 size += rxq->data_desc_size * data_ring->size;
1241
1242         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1243                                       VMXNET3_RING_BA_ALIGN, socket_id);
1244         if (mz == NULL) {
1245                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1246                 return -ENOMEM;
1247         }
1248         rxq->mz = mz;
1249         memset(mz->addr, 0, mz->len);
1250
1251         /* cmd_ring0 initialization */
1252         ring0->base = mz->addr;
1253         ring0->basePA = mz->iova;
1254
1255         /* cmd_ring1 initialization */
1256         ring1->base = ring0->base + ring0->size;
1257         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1258
1259         /* comp_ring initialization */
1260         comp_ring->base = ring1->base + ring1->size;
1261         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1262                 ring1->size;
1263
1264         /* data_ring initialization */
1265         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1266                 data_ring->base =
1267                         (uint8_t *)(comp_ring->base + comp_ring->size);
1268                 data_ring->basePA = comp_ring->basePA +
1269                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1270         }
1271
1272         /* cmd_ring0-cmd_ring1 buf_info allocation */
1273         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1274
1275                 ring = &rxq->cmd_ring[i];
1276                 ring->rid = i;
1277                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1278
1279                 ring->buf_info = rte_zmalloc(mem_name,
1280                                              ring->size * sizeof(vmxnet3_buf_info_t),
1281                                              RTE_CACHE_LINE_SIZE);
1282                 if (ring->buf_info == NULL) {
1283                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1284                         return -ENOMEM;
1285                 }
1286         }
1287
1288         /* Update the data portion with rxq */
1289         dev->data->rx_queues[queue_idx] = rxq;
1290
1291         return 0;
1292 }
1293
1294 /*
1295  * Initializes Receive Unit
1296  * Load mbufs in rx queue in advance
1297  */
1298 int
1299 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1300 {
1301         struct vmxnet3_hw *hw = dev->data->dev_private;
1302
1303         int i, ret;
1304         uint8_t j;
1305
1306         PMD_INIT_FUNC_TRACE();
1307
1308         for (i = 0; i < hw->num_rx_queues; i++) {
1309                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1310
1311                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1312                         /* Passing 0 as alloc_num will allocate full ring */
1313                         ret = vmxnet3_post_rx_bufs(rxq, j);
1314                         if (ret <= 0) {
1315                                 PMD_INIT_LOG(ERR,
1316                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1317                                              i, j);
1318                                 return -ret;
1319                         }
1320                         /*
1321                          * Updating device with the index:next2fill to fill the
1322                          * mbufs for coming packets.
1323                          */
1324                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1325                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1326                                                        rxq->cmd_ring[j].next2fill);
1327                         }
1328                 }
1329                 rxq->stopped = FALSE;
1330                 rxq->start_seg = NULL;
1331         }
1332
1333         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1334                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1335
1336                 txq->stopped = FALSE;
1337         }
1338
1339         return 0;
1340 }
1341
1342 static uint8_t rss_intel_key[40] = {
1343         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1344         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1345         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1346         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1347         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1348 };
1349
1350 /*
1351  * Additional RSS configurations based on vmxnet v4+ APIs
1352  */
1353 int
1354 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1355 {
1356         struct vmxnet3_hw *hw = dev->data->dev_private;
1357         Vmxnet3_DriverShared *shared = hw->shared;
1358         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1359         struct rte_eth_rss_conf *port_rss_conf;
1360         uint64_t rss_hf;
1361         uint32_t ret;
1362
1363         PMD_INIT_FUNC_TRACE();
1364
1365         cmdInfo->setRSSFields = 0;
1366         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1367
1368         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1369             VMXNET3_MANDATORY_V4_RSS) {
1370                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1371                              "automatically setting it");
1372                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1373         }
1374
1375         rss_hf = port_rss_conf->rss_hf &
1376                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1377
1378         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1379                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1380         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1381                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1382         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1383                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1384         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1385                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1386
1387         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1388                                VMXNET3_CMD_SET_RSS_FIELDS);
1389         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1390
1391         if (ret != VMXNET3_SUCCESS) {
1392                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1393         }
1394
1395         return ret;
1396 }
1397
1398 /*
1399  * Configure RSS feature
1400  */
1401 int
1402 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1403 {
1404         struct vmxnet3_hw *hw = dev->data->dev_private;
1405         struct VMXNET3_RSSConf *dev_rss_conf;
1406         struct rte_eth_rss_conf *port_rss_conf;
1407         uint64_t rss_hf;
1408         uint8_t i, j;
1409
1410         PMD_INIT_FUNC_TRACE();
1411
1412         dev_rss_conf = hw->rss_conf;
1413         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1414
1415         /* loading hashFunc */
1416         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1417         /* loading hashKeySize */
1418         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1419         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1420         dev_rss_conf->indTableSize = (uint16_t)((MAX_RX_QUEUES(hw)) * 4);
1421
1422         if (port_rss_conf->rss_key == NULL) {
1423                 /* Default hash key */
1424                 port_rss_conf->rss_key = rss_intel_key;
1425         }
1426
1427         /* loading hashKey */
1428         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1429                dev_rss_conf->hashKeySize);
1430
1431         /* loading indTable */
1432         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1433                 if (j == dev->data->nb_rx_queues)
1434                         j = 0;
1435                 dev_rss_conf->indTable[i] = j;
1436         }
1437
1438         /* loading hashType */
1439         dev_rss_conf->hashType = 0;
1440         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1441         if (rss_hf & RTE_ETH_RSS_IPV4)
1442                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1443         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1444                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1445         if (rss_hf & RTE_ETH_RSS_IPV6)
1446                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1447         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1448                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1449
1450         return VMXNET3_SUCCESS;
1451 }