c94e3762e659f56d8b57a27eba1435428805358e
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \
52                 RTE_MBUF_F_TX_IPV6 |     \
53                 RTE_MBUF_F_TX_IPV4 |     \
54                 RTE_MBUF_F_TX_L4_MASK |  \
55                 RTE_MBUF_F_TX_TCP_SEG)
56
57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
58         (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73         uint32_t avail = 0;
74
75         if (rxq == NULL)
76                 return;
77
78         PMD_RX_LOG(DEBUG,
79                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81         PMD_RX_LOG(DEBUG,
82                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83                    (unsigned long)rxq->cmd_ring[0].basePA,
84                    (unsigned long)rxq->cmd_ring[1].basePA,
85                    (unsigned long)rxq->comp_ring.basePA);
86
87         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88         PMD_RX_LOG(DEBUG,
89                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90                    (uint32_t)rxq->cmd_ring[0].size, avail,
91                    rxq->comp_ring.next2proc,
92                    rxq->cmd_ring[0].size - avail);
93
94         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97                    rxq->cmd_ring[1].size - avail);
98
99 }
100
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104         uint32_t avail = 0;
105
106         if (txq == NULL)
107                 return;
108
109         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112                    (unsigned long)txq->cmd_ring.basePA,
113                    (unsigned long)txq->comp_ring.basePA,
114                    (unsigned long)txq->data_ring.basePA);
115
116         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)txq->cmd_ring.size, avail,
119                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126         while (ring->next2comp != ring->next2fill) {
127                 /* No need to worry about desc ownership, device is quiesced by now. */
128                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129
130                 if (buf_info->m) {
131                         rte_pktmbuf_free(buf_info->m);
132                         buf_info->m = NULL;
133                         buf_info->bufPA = 0;
134                         buf_info->len = 0;
135                 }
136                 vmxnet3_cmd_ring_adv_next2comp(ring);
137         }
138 }
139
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143         uint32_t i;
144
145         for (i = 0; i < ring->size; i++) {
146                 /* No need to worry about desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free_seg(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         rte_free(ring->buf_info);
163         ring->buf_info = NULL;
164 }
165
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169         vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170
171         if (tq != NULL) {
172                 /* Release mbufs */
173                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176                 /* Release the memzone */
177                 rte_memzone_free(tq->mz);
178                 /* Release the queue */
179                 rte_free(tq);
180         }
181 }
182
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186         int i;
187         vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188
189         if (rq != NULL) {
190                 /* Release mbufs */
191                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193
194                 /* Release both the cmd_rings */
195                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197
198                 /* Release the memzone */
199                 rte_memzone_free(rq->mz);
200
201                 /* Release the queue */
202                 rte_free(rq);
203         }
204 }
205
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209         vmxnet3_tx_queue_t *tq = txq;
210         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213         int size;
214
215         if (tq != NULL) {
216                 /* Release the cmd_ring mbufs */
217                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218         }
219
220         /* Tx vmxnet rings structure initialization*/
221         ring->next2fill = 0;
222         ring->next2comp = 0;
223         ring->gen = VMXNET3_INIT_GEN;
224         comp_ring->next2proc = 0;
225         comp_ring->gen = VMXNET3_INIT_GEN;
226
227         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229         size += tq->txdata_desc_size * data_ring->size;
230
231         memset(ring->base, 0, size);
232 }
233
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237         int i;
238         vmxnet3_rx_queue_t *rq = rxq;
239         struct vmxnet3_hw *hw = rq->hw;
240         struct vmxnet3_cmd_ring *ring0, *ring1;
241         struct vmxnet3_comp_ring *comp_ring;
242         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243         int size;
244
245         /* Release both the cmd_rings mbufs */
246         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248
249         ring0 = &rq->cmd_ring[0];
250         ring1 = &rq->cmd_ring[1];
251         comp_ring = &rq->comp_ring;
252
253         /* Rx vmxnet rings structure initialization */
254         ring0->next2fill = 0;
255         ring1->next2fill = 0;
256         ring0->next2comp = 0;
257         ring1->next2comp = 0;
258         ring0->gen = VMXNET3_INIT_GEN;
259         ring1->gen = VMXNET3_INIT_GEN;
260         comp_ring->next2proc = 0;
261         comp_ring->gen = VMXNET3_INIT_GEN;
262
263         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266                 size += rq->data_desc_size * data_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302
303         /* Release cmd_ring descriptor and free mbuf */
304         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305
306         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307         if (mbuf == NULL)
308                 rte_panic("EOP desc does not point to a valid mbuf");
309         rte_pktmbuf_free(mbuf);
310
311         txq->cmd_ring.buf_info[eop_idx].m = NULL;
312
313         while (txq->cmd_ring.next2comp != eop_idx) {
314                 /* no out-of-order completion */
315                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                 completed++;
318         }
319
320         /* Mark the txd for which tcd was generated as completed */
321         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323         return completed + 1;
324 }
325
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329         int completed = 0;
330         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332                 (comp_ring->base + comp_ring->next2proc);
333
334         while (tcd->gen == comp_ring->gen) {
335                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336
337                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
338                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339                                                     comp_ring->next2proc);
340         }
341
342         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343
344         /* To avoid compiler warnings when not in DEBUG mode. */
345         RTE_SET_USED(completed);
346 }
347
348 uint16_t
349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350         uint16_t nb_pkts)
351 {
352         int32_t ret;
353         uint32_t i;
354         uint64_t ol_flags;
355         struct rte_mbuf *m;
356
357         for (i = 0; i != nb_pkts; i++) {
358                 m = tx_pkts[i];
359                 ol_flags = m->ol_flags;
360
361                 /* Non-TSO packet cannot occupy more than
362                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363                  */
364                 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366                         rte_errno = EINVAL;
367                         return i;
368                 }
369
370                 /* check that only supported TX offloads are requested. */
371                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372                                 (ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373                                 RTE_MBUF_F_TX_SCTP_CKSUM) {
374                         rte_errno = ENOTSUP;
375                         return i;
376                 }
377
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379                 ret = rte_validate_tx_offload(m);
380                 if (ret != 0) {
381                         rte_errno = -ret;
382                         return i;
383                 }
384 #endif
385                 ret = rte_net_intel_cksum_prepare(m);
386                 if (ret != 0) {
387                         rte_errno = -ret;
388                         return i;
389                 }
390         }
391
392         return i;
393 }
394
395 uint16_t
396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397                   uint16_t nb_pkts)
398 {
399         uint16_t nb_tx;
400         vmxnet3_tx_queue_t *txq = tx_queue;
401         struct vmxnet3_hw *hw = txq->hw;
402         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404
405         if (unlikely(txq->stopped)) {
406                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407                 return 0;
408         }
409
410         /* Free up the comp_descriptors aggressively */
411         vmxnet3_tq_tx_complete(txq);
412
413         nb_tx = 0;
414         while (nb_tx < nb_pkts) {
415                 Vmxnet3_GenericDesc *gdesc;
416                 vmxnet3_buf_info_t *tbi;
417                 uint32_t first2fill, avail, dw2;
418                 struct rte_mbuf *txm = tx_pkts[nb_tx];
419                 struct rte_mbuf *m_seg = txm;
420                 int copy_size = 0;
421                 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422                 /* # of descriptors needed for a packet. */
423                 unsigned count = txm->nb_segs;
424
425                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426                 if (count > avail) {
427                         /* Is command ring full? */
428                         if (unlikely(avail == 0)) {
429                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
430                                 txq->stats.tx_ring_full++;
431                                 txq->stats.drop_total += (nb_pkts - nb_tx);
432                                 break;
433                         }
434
435                         /* Command ring is not full but cannot handle the
436                          * multi-segmented packet. Let's try the next packet
437                          * in this case.
438                          */
439                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440                                    "(avail %d needed %d)", avail, count);
441                         txq->stats.drop_total++;
442                         if (tso)
443                                 txq->stats.drop_tso++;
444                         rte_pktmbuf_free(txm);
445                         nb_tx++;
446                         continue;
447                 }
448
449                 /* Drop non-TSO packet that is excessively fragmented */
450                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453                         txq->stats.drop_too_many_segs++;
454                         txq->stats.drop_total++;
455                         rte_pktmbuf_free(txm);
456                         nb_tx++;
457                         continue;
458                 }
459
460                 if (txm->nb_segs == 1 &&
461                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462                         struct Vmxnet3_TxDataDesc *tdd;
463
464                         /* Skip empty packets */
465                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466                                 txq->stats.drop_total++;
467                                 rte_pktmbuf_free(txm);
468                                 nb_tx++;
469                                 continue;
470                         }
471
472                         tdd = (struct Vmxnet3_TxDataDesc *)
473                                 ((uint8 *)txq->data_ring.base +
474                                  txq->cmd_ring.next2fill *
475                                  txq->txdata_desc_size);
476                         copy_size = rte_pktmbuf_pkt_len(txm);
477                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478                 }
479
480                 /* use the previous gen bit for the SOP desc */
481                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482                 first2fill = txq->cmd_ring.next2fill;
483                 do {
484                         /* Remember the transmit buffer for cleanup */
485                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486
487                         /* NB: the following assumes that VMXNET3 maximum
488                          * transmit buffer size (16K) is greater than
489                          * maximum size of mbuf segment size.
490                          */
491                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492
493                         /* Skip empty segments */
494                         if (unlikely(m_seg->data_len == 0))
495                                 continue;
496
497                         if (copy_size) {
498                                 uint64 offset =
499                                         (uint64)txq->cmd_ring.next2fill *
500                                                         txq->txdata_desc_size;
501                                 gdesc->txd.addr =
502                                         rte_cpu_to_le_64(txq->data_ring.basePA +
503                                                          offset);
504                         } else {
505                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506                         }
507
508                         gdesc->dword[2] = dw2 | m_seg->data_len;
509                         gdesc->dword[3] = 0;
510
511                         /* move to the next2fill descriptor */
512                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513
514                         /* use the right gen for non-SOP desc */
515                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516                 } while ((m_seg = m_seg->next) != NULL);
517
518                 /* set the last buf_info for the pkt */
519                 tbi->m = txm;
520                 /* Update the EOP descriptor */
521                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522
523                 /* Add VLAN tag if present */
524                 gdesc = txq->cmd_ring.base + first2fill;
525                 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526                         gdesc->txd.ti = 1;
527                         gdesc->txd.tci = txm->vlan_tci;
528                 }
529
530                 if (tso) {
531                         uint16_t mss = txm->tso_segsz;
532
533                         RTE_ASSERT(mss > 0);
534
535                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536                         gdesc->txd.om = VMXNET3_OM_TSO;
537                         gdesc->txd.msscof = mss;
538
539                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540                 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541                         gdesc->txd.om = VMXNET3_OM_CSUM;
542                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543
544                         switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545                         case RTE_MBUF_F_TX_TCP_CKSUM:
546                                 gdesc->txd.msscof = gdesc->txd.hlen +
547                                         offsetof(struct rte_tcp_hdr, cksum);
548                                 break;
549                         case RTE_MBUF_F_TX_UDP_CKSUM:
550                                 gdesc->txd.msscof = gdesc->txd.hlen +
551                                         offsetof(struct rte_udp_hdr,
552                                                 dgram_cksum);
553                                 break;
554                         default:
555                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556                                            txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557                                 abort();
558                         }
559                         deferred++;
560                 } else {
561                         gdesc->txd.hlen = 0;
562                         gdesc->txd.om = VMXNET3_OM_NONE;
563                         gdesc->txd.msscof = 0;
564                         deferred++;
565                 }
566
567                 /* flip the GEN bit on the SOP */
568                 rte_compiler_barrier();
569                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570
571                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572                 nb_tx++;
573         }
574
575         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576
577         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578                 txq_ctrl->txNumDeferred = 0;
579                 /* Notify vSwitch that packets are available. */
580                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581                                        txq->cmd_ring.next2fill);
582         }
583
584         return nb_tx;
585 }
586
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589                    struct rte_mbuf *mbuf)
590 {
591         uint32_t val;
592         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593         struct Vmxnet3_RxDesc *rxd =
594                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596
597         if (ring_id == 0) {
598                 /* Usually: One HEAD type buf per packet
599                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601                  */
602
603                 /* We use single packet buffer so all heads here */
604                 val = VMXNET3_RXD_BTYPE_HEAD;
605         } else {
606                 /* All BODY type buffers for 2nd ring */
607                 val = VMXNET3_RXD_BTYPE_BODY;
608         }
609
610         /*
611          * Load mbuf pointer into buf_info[ring_size]
612          * buf_info structure is equivalent to cookie for virtio-virtqueue
613          */
614         buf_info->m = mbuf;
615         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617
618         /* Load Rx Descriptor with the buffer's GPA */
619         rxd->addr = buf_info->bufPA;
620
621         /* After this point rxd->addr MUST not be NULL */
622         rxd->btype = val;
623         rxd->len = buf_info->len;
624         /* Flip gen bit at the end to change ownership */
625         rxd->gen = ring->gen;
626
627         vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
631  *  so that device can receive packets in those buffers.
632  *  Ring layout:
633  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
634  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
635  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
637  *      only for LRO.
638  */
639 static int
640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642         int err = 0;
643         uint32_t i = 0;
644         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645
646         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647                 struct rte_mbuf *mbuf;
648
649                 /* Allocate blank mbuf for the current Rx Descriptor */
650                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
651                 if (unlikely(mbuf == NULL)) {
652                         PMD_RX_LOG(ERR, "Error allocating mbuf");
653                         rxq->stats.rx_buf_alloc_failure++;
654                         err = ENOMEM;
655                         break;
656                 }
657
658                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
659                 i++;
660         }
661
662         /* Return error only if no buffers are posted at present */
663         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664                 return -err;
665         else
666                 return i;
667 }
668
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672                 struct rte_mbuf *rxm)
673 {
674         uint32_t hlen, slen;
675         struct rte_ipv4_hdr *ipv4_hdr;
676         struct rte_ipv6_hdr *ipv6_hdr;
677         struct rte_tcp_hdr *tcp_hdr;
678         char *ptr;
679         uint8_t segs;
680
681         RTE_ASSERT(rcd->tcp);
682
683         ptr = rte_pktmbuf_mtod(rxm, char *);
684         slen = rte_pktmbuf_data_len(rxm);
685         hlen = sizeof(struct rte_ether_hdr);
686
687         if (rcd->v4) {
688                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
690                                         - sizeof(struct rte_tcp_hdr);
691
692                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
694         } else if (rcd->v6) {
695                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697                                         sizeof(struct rte_tcp_hdr);
698
699                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700                 hlen += sizeof(struct rte_ipv6_hdr);
701                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702                         int frag;
703
704                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705                                         &hlen, &frag);
706                 }
707         }
708
709         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711                                 sizeof(struct rte_ether_hdr);
712
713         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715
716         segs = *vmxnet3_segs_dynfield(rxm);
717         if (segs > 1)
718                 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719         else
720                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722
723 /* Receive side checksum and other offloads */
724 static inline void
725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726                 struct rte_mbuf *rxm, const uint8_t sop)
727 {
728         uint64_t ol_flags = rxm->ol_flags;
729         uint32_t packet_type = rxm->packet_type;
730
731         /* Offloads set in sop */
732         if (sop) {
733                 /* Set packet type */
734                 packet_type |= RTE_PTYPE_L2_ETHER;
735
736                 /* Check large packet receive */
737                 if (VMXNET3_VERSION_GE_2(hw) &&
738                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739                         const Vmxnet3_RxCompDescExt *rcde =
740                                         (const Vmxnet3_RxCompDescExt *)rcd;
741
742                         rxm->tso_segsz = rcde->mss;
743                         *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744                         ol_flags |= RTE_MBUF_F_RX_LRO;
745                 }
746         } else { /* Offloads set in eop */
747                 /* Check for RSS */
748                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749                         ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750                         rxm->hash.rss = rcd->rssHash;
751                 }
752
753                 /* Check for hardware stripped VLAN tag */
754                 if (rcd->ts) {
755                         ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757                 }
758
759                 /* Check packet type, checksum errors, etc. */
760                 if (rcd->cnc) {
761                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762                 } else {
763                         if (rcd->v4) {
764                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765
766                                 if (rcd->ipc)
767                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
768                                 else
769                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
770
771                                 if (rcd->tuc) {
772                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
773                                         if (rcd->tcp)
774                                                 packet_type |= RTE_PTYPE_L4_TCP;
775                                         else
776                                                 packet_type |= RTE_PTYPE_L4_UDP;
777                                 } else {
778                                         if (rcd->tcp) {
779                                                 packet_type |= RTE_PTYPE_L4_TCP;
780                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
781                                         } else if (rcd->udp) {
782                                                 packet_type |= RTE_PTYPE_L4_UDP;
783                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
784                                         }
785                                 }
786                         } else if (rcd->v6) {
787                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
788
789                                 if (rcd->tuc) {
790                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
791                                         if (rcd->tcp)
792                                                 packet_type |= RTE_PTYPE_L4_TCP;
793                                         else
794                                                 packet_type |= RTE_PTYPE_L4_UDP;
795                                 } else {
796                                         if (rcd->tcp) {
797                                                 packet_type |= RTE_PTYPE_L4_TCP;
798                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
799                                         } else if (rcd->udp) {
800                                                 packet_type |= RTE_PTYPE_L4_UDP;
801                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
802                                         }
803                                 }
804                         } else {
805                                 packet_type |= RTE_PTYPE_UNKNOWN;
806                         }
807
808                         /* Old variants of vmxnet3 do not provide MSS */
809                         if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
810                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
811                                                 rcd, rxm);
812                 }
813         }
814
815         rxm->ol_flags = ol_flags;
816         rxm->packet_type = packet_type;
817 }
818
819 /*
820  * Process the Rx Completion Ring of given vmxnet3_rx_queue
821  * for nb_pkts burst and return the number of packets received
822  */
823 uint16_t
824 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
825 {
826         uint16_t nb_rx;
827         uint32_t nb_rxd, idx;
828         uint8_t ring_idx;
829         vmxnet3_rx_queue_t *rxq;
830         Vmxnet3_RxCompDesc *rcd;
831         vmxnet3_buf_info_t *rbi;
832         Vmxnet3_RxDesc *rxd;
833         struct rte_mbuf *rxm = NULL;
834         struct vmxnet3_hw *hw;
835
836         nb_rx = 0;
837         ring_idx = 0;
838         nb_rxd = 0;
839         idx = 0;
840
841         rxq = rx_queue;
842         hw = rxq->hw;
843
844         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
845
846         if (unlikely(rxq->stopped)) {
847                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
848                 return 0;
849         }
850
851         while (rcd->gen == rxq->comp_ring.gen) {
852                 struct rte_mbuf *newm;
853
854                 if (nb_rx >= nb_pkts)
855                         break;
856
857                 newm = rte_mbuf_raw_alloc(rxq->mp);
858                 if (unlikely(newm == NULL)) {
859                         PMD_RX_LOG(ERR, "Error allocating mbuf");
860                         rxq->stats.rx_buf_alloc_failure++;
861                         break;
862                 }
863
864                 idx = rcd->rxdIdx;
865                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
866                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
867                 RTE_SET_USED(rxd); /* used only for assert when enabled */
868                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
869
870                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
871
872                 RTE_ASSERT(rcd->len <= rxd->len);
873                 RTE_ASSERT(rbi->m);
874
875                 /* Get the packet buffer pointer from buf_info */
876                 rxm = rbi->m;
877
878                 /* Clear descriptor associated buf_info to be reused */
879                 rbi->m = NULL;
880                 rbi->bufPA = 0;
881
882                 /* Update the index that we received a packet */
883                 rxq->cmd_ring[ring_idx].next2comp = idx;
884
885                 /* For RCD with EOP set, check if there is frame error */
886                 if (unlikely(rcd->eop && rcd->err)) {
887                         rxq->stats.drop_total++;
888                         rxq->stats.drop_err++;
889
890                         if (!rcd->fcs) {
891                                 rxq->stats.drop_fcs++;
892                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
893                         }
894                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
895                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
896                                          rxq->comp_ring.base), rcd->rxdIdx);
897                         rte_pktmbuf_free_seg(rxm);
898                         if (rxq->start_seg) {
899                                 struct rte_mbuf *start = rxq->start_seg;
900
901                                 rxq->start_seg = NULL;
902                                 rte_pktmbuf_free(start);
903                         }
904                         goto rcd_done;
905                 }
906
907                 /* Initialize newly received packet buffer */
908                 rxm->port = rxq->port_id;
909                 rxm->nb_segs = 1;
910                 rxm->next = NULL;
911                 rxm->pkt_len = (uint16_t)rcd->len;
912                 rxm->data_len = (uint16_t)rcd->len;
913                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
914                 rxm->ol_flags = 0;
915                 rxm->vlan_tci = 0;
916                 rxm->packet_type = 0;
917
918                 /*
919                  * If this is the first buffer of the received packet,
920                  * set the pointer to the first mbuf of the packet
921                  * Otherwise, update the total length and the number of segments
922                  * of the current scattered packet, and update the pointer to
923                  * the last mbuf of the current packet.
924                  */
925                 if (rcd->sop) {
926                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
927
928                         if (unlikely(rcd->len == 0)) {
929                                 RTE_ASSERT(rcd->eop);
930
931                                 PMD_RX_LOG(DEBUG,
932                                            "Rx buf was skipped. rxring[%d][%d])",
933                                            ring_idx, idx);
934                                 rte_pktmbuf_free_seg(rxm);
935                                 goto rcd_done;
936                         }
937
938                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
939                                 uint8_t *rdd = rxq->data_ring.base +
940                                         idx * rxq->data_desc_size;
941
942                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
943                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
944                                            rdd, rcd->len);
945                         }
946
947                         rxq->start_seg = rxm;
948                         rxq->last_seg = rxm;
949                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
950                 } else {
951                         struct rte_mbuf *start = rxq->start_seg;
952
953                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
954
955                         if (likely(start && rxm->data_len > 0)) {
956                                 start->pkt_len += rxm->data_len;
957                                 start->nb_segs++;
958
959                                 rxq->last_seg->next = rxm;
960                                 rxq->last_seg = rxm;
961                         } else {
962                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
963                                 rxq->stats.drop_total++;
964                                 rxq->stats.drop_err++;
965
966                                 rte_pktmbuf_free_seg(rxm);
967                         }
968                 }
969
970                 if (rcd->eop) {
971                         struct rte_mbuf *start = rxq->start_seg;
972
973                         vmxnet3_rx_offload(hw, rcd, start, 0);
974                         rx_pkts[nb_rx++] = start;
975                         rxq->start_seg = NULL;
976                 }
977
978 rcd_done:
979                 rxq->cmd_ring[ring_idx].next2comp = idx;
980                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
981                                           rxq->cmd_ring[ring_idx].size);
982
983                 /* It's time to renew descriptors */
984                 vmxnet3_renew_desc(rxq, ring_idx, newm);
985                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
986                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
987                                                rxq->cmd_ring[ring_idx].next2fill);
988                 }
989
990                 /* Advance to the next descriptor in comp_ring */
991                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
992
993                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
994                 nb_rxd++;
995                 if (nb_rxd > rxq->cmd_ring[0].size) {
996                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
997                                    " relinquish control.");
998                         break;
999                 }
1000         }
1001
1002         if (unlikely(nb_rxd == 0)) {
1003                 uint32_t avail;
1004                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1005                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1006                         if (unlikely(avail > 0)) {
1007                                 /* try to alloc new buf and renew descriptors */
1008                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1009                         }
1010                 }
1011                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1012                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1013                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1014                                                        rxq->cmd_ring[ring_idx].next2fill);
1015                         }
1016                 }
1017         }
1018
1019         return nb_rx;
1020 }
1021
1022 uint32_t
1023 vmxnet3_dev_rx_queue_count(void *rx_queue)
1024 {
1025         const vmxnet3_rx_queue_t *rxq;
1026         const Vmxnet3_RxCompDesc *rcd;
1027         uint32_t idx, nb_rxd = 0;
1028         uint8_t gen;
1029
1030         rxq = rx_queue;
1031         if (unlikely(rxq->stopped)) {
1032                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
1033                 return 0;
1034         }
1035
1036         gen = rxq->comp_ring.gen;
1037         idx = rxq->comp_ring.next2proc;
1038         rcd = &rxq->comp_ring.base[idx].rcd;
1039         while (rcd->gen == gen) {
1040                 if (rcd->eop)
1041                         ++nb_rxd;
1042                 if (++idx == rxq->comp_ring.size) {
1043                         idx = 0;
1044                         gen ^= 1;
1045                 }
1046                 rcd = &rxq->comp_ring.base[idx].rcd;
1047         }
1048
1049         return nb_rxd;
1050 }
1051
1052 int
1053 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1054                            uint16_t queue_idx,
1055                            uint16_t nb_desc,
1056                            unsigned int socket_id,
1057                            const struct rte_eth_txconf *tx_conf __rte_unused)
1058 {
1059         struct vmxnet3_hw *hw = dev->data->dev_private;
1060         const struct rte_memzone *mz;
1061         struct vmxnet3_tx_queue *txq;
1062         struct vmxnet3_cmd_ring *ring;
1063         struct vmxnet3_comp_ring *comp_ring;
1064         struct vmxnet3_data_ring *data_ring;
1065         int size;
1066
1067         PMD_INIT_FUNC_TRACE();
1068
1069         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1070                           RTE_CACHE_LINE_SIZE);
1071         if (txq == NULL) {
1072                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1073                 return -ENOMEM;
1074         }
1075
1076         txq->queue_id = queue_idx;
1077         txq->port_id = dev->data->port_id;
1078         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1079         txq->hw = hw;
1080         txq->qid = queue_idx;
1081         txq->stopped = TRUE;
1082         txq->txdata_desc_size = hw->txdata_desc_size;
1083
1084         ring = &txq->cmd_ring;
1085         comp_ring = &txq->comp_ring;
1086         data_ring = &txq->data_ring;
1087
1088         /* Tx vmxnet ring length should be between 512-4096 */
1089         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1090                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1091                              VMXNET3_DEF_TX_RING_SIZE);
1092                 return -EINVAL;
1093         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1094                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1095                              VMXNET3_TX_RING_MAX_SIZE);
1096                 return -EINVAL;
1097         } else {
1098                 ring->size = nb_desc;
1099                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1100         }
1101         comp_ring->size = data_ring->size = ring->size;
1102
1103         /* Tx vmxnet rings structure initialization*/
1104         ring->next2fill = 0;
1105         ring->next2comp = 0;
1106         ring->gen = VMXNET3_INIT_GEN;
1107         comp_ring->next2proc = 0;
1108         comp_ring->gen = VMXNET3_INIT_GEN;
1109
1110         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1111         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1112         size += txq->txdata_desc_size * data_ring->size;
1113
1114         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1115                                       VMXNET3_RING_BA_ALIGN, socket_id);
1116         if (mz == NULL) {
1117                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1118                 return -ENOMEM;
1119         }
1120         txq->mz = mz;
1121         memset(mz->addr, 0, mz->len);
1122
1123         /* cmd_ring initialization */
1124         ring->base = mz->addr;
1125         ring->basePA = mz->iova;
1126
1127         /* comp_ring initialization */
1128         comp_ring->base = ring->base + ring->size;
1129         comp_ring->basePA = ring->basePA +
1130                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1131
1132         /* data_ring initialization */
1133         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1134         data_ring->basePA = comp_ring->basePA +
1135                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1136
1137         /* cmd_ring0 buf_info allocation */
1138         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1139                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1140         if (ring->buf_info == NULL) {
1141                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1142                 return -ENOMEM;
1143         }
1144
1145         /* Update the data portion with txq */
1146         dev->data->tx_queues[queue_idx] = txq;
1147
1148         return 0;
1149 }
1150
1151 int
1152 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1153                            uint16_t queue_idx,
1154                            uint16_t nb_desc,
1155                            unsigned int socket_id,
1156                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1157                            struct rte_mempool *mp)
1158 {
1159         const struct rte_memzone *mz;
1160         struct vmxnet3_rx_queue *rxq;
1161         struct vmxnet3_hw *hw = dev->data->dev_private;
1162         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1163         struct vmxnet3_comp_ring *comp_ring;
1164         struct vmxnet3_rx_data_ring *data_ring;
1165         int size;
1166         uint8_t i;
1167         char mem_name[32];
1168
1169         PMD_INIT_FUNC_TRACE();
1170
1171         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1172                           RTE_CACHE_LINE_SIZE);
1173         if (rxq == NULL) {
1174                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1175                 return -ENOMEM;
1176         }
1177
1178         rxq->mp = mp;
1179         rxq->queue_id = queue_idx;
1180         rxq->port_id = dev->data->port_id;
1181         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1182         rxq->hw = hw;
1183         rxq->qid1 = queue_idx;
1184         rxq->qid2 = queue_idx + hw->num_rx_queues;
1185         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1186         rxq->data_desc_size = hw->rxdata_desc_size;
1187         rxq->stopped = TRUE;
1188
1189         ring0 = &rxq->cmd_ring[0];
1190         ring1 = &rxq->cmd_ring[1];
1191         comp_ring = &rxq->comp_ring;
1192         data_ring = &rxq->data_ring;
1193
1194         /* Rx vmxnet rings length should be between 256-4096 */
1195         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1196                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1197                 return -EINVAL;
1198         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1199                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1200                 return -EINVAL;
1201         } else {
1202                 ring0->size = nb_desc;
1203                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1204                 ring1->size = ring0->size;
1205         }
1206
1207         comp_ring->size = ring0->size + ring1->size;
1208         data_ring->size = ring0->size;
1209
1210         /* Rx vmxnet rings structure initialization */
1211         ring0->next2fill = 0;
1212         ring1->next2fill = 0;
1213         ring0->next2comp = 0;
1214         ring1->next2comp = 0;
1215         ring0->gen = VMXNET3_INIT_GEN;
1216         ring1->gen = VMXNET3_INIT_GEN;
1217         comp_ring->next2proc = 0;
1218         comp_ring->gen = VMXNET3_INIT_GEN;
1219
1220         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1221         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1222         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1223                 size += rxq->data_desc_size * data_ring->size;
1224
1225         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1226                                       VMXNET3_RING_BA_ALIGN, socket_id);
1227         if (mz == NULL) {
1228                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1229                 return -ENOMEM;
1230         }
1231         rxq->mz = mz;
1232         memset(mz->addr, 0, mz->len);
1233
1234         /* cmd_ring0 initialization */
1235         ring0->base = mz->addr;
1236         ring0->basePA = mz->iova;
1237
1238         /* cmd_ring1 initialization */
1239         ring1->base = ring0->base + ring0->size;
1240         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1241
1242         /* comp_ring initialization */
1243         comp_ring->base = ring1->base + ring1->size;
1244         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1245                 ring1->size;
1246
1247         /* data_ring initialization */
1248         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1249                 data_ring->base =
1250                         (uint8_t *)(comp_ring->base + comp_ring->size);
1251                 data_ring->basePA = comp_ring->basePA +
1252                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1253         }
1254
1255         /* cmd_ring0-cmd_ring1 buf_info allocation */
1256         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1257
1258                 ring = &rxq->cmd_ring[i];
1259                 ring->rid = i;
1260                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1261
1262                 ring->buf_info = rte_zmalloc(mem_name,
1263                                              ring->size * sizeof(vmxnet3_buf_info_t),
1264                                              RTE_CACHE_LINE_SIZE);
1265                 if (ring->buf_info == NULL) {
1266                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1267                         return -ENOMEM;
1268                 }
1269         }
1270
1271         /* Update the data portion with rxq */
1272         dev->data->rx_queues[queue_idx] = rxq;
1273
1274         return 0;
1275 }
1276
1277 /*
1278  * Initializes Receive Unit
1279  * Load mbufs in rx queue in advance
1280  */
1281 int
1282 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1283 {
1284         struct vmxnet3_hw *hw = dev->data->dev_private;
1285
1286         int i, ret;
1287         uint8_t j;
1288
1289         PMD_INIT_FUNC_TRACE();
1290
1291         for (i = 0; i < hw->num_rx_queues; i++) {
1292                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1293
1294                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1295                         /* Passing 0 as alloc_num will allocate full ring */
1296                         ret = vmxnet3_post_rx_bufs(rxq, j);
1297                         if (ret <= 0) {
1298                                 PMD_INIT_LOG(ERR,
1299                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1300                                              i, j);
1301                                 return -ret;
1302                         }
1303                         /*
1304                          * Updating device with the index:next2fill to fill the
1305                          * mbufs for coming packets.
1306                          */
1307                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1308                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1309                                                        rxq->cmd_ring[j].next2fill);
1310                         }
1311                 }
1312                 rxq->stopped = FALSE;
1313                 rxq->start_seg = NULL;
1314         }
1315
1316         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1317                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1318
1319                 txq->stopped = FALSE;
1320         }
1321
1322         return 0;
1323 }
1324
1325 static uint8_t rss_intel_key[40] = {
1326         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1327         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1328         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1329         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1330         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1331 };
1332
1333 /*
1334  * Additional RSS configurations based on vmxnet v4+ APIs
1335  */
1336 int
1337 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1338 {
1339         struct vmxnet3_hw *hw = dev->data->dev_private;
1340         Vmxnet3_DriverShared *shared = hw->shared;
1341         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1342         struct rte_eth_rss_conf *port_rss_conf;
1343         uint64_t rss_hf;
1344         uint32_t ret;
1345
1346         PMD_INIT_FUNC_TRACE();
1347
1348         cmdInfo->setRSSFields = 0;
1349         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1350
1351         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1352             VMXNET3_MANDATORY_V4_RSS) {
1353                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1354                              "automatically setting it");
1355                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1356         }
1357
1358         rss_hf = port_rss_conf->rss_hf &
1359                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1360
1361         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1362                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1363         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1364                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1365         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1366                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1367         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1368                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1369
1370         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1371                                VMXNET3_CMD_SET_RSS_FIELDS);
1372         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1373
1374         if (ret != VMXNET3_SUCCESS) {
1375                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1376         }
1377
1378         return ret;
1379 }
1380
1381 /*
1382  * Configure RSS feature
1383  */
1384 int
1385 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1386 {
1387         struct vmxnet3_hw *hw = dev->data->dev_private;
1388         struct VMXNET3_RSSConf *dev_rss_conf;
1389         struct rte_eth_rss_conf *port_rss_conf;
1390         uint64_t rss_hf;
1391         uint8_t i, j;
1392
1393         PMD_INIT_FUNC_TRACE();
1394
1395         dev_rss_conf = hw->rss_conf;
1396         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1397
1398         /* loading hashFunc */
1399         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1400         /* loading hashKeySize */
1401         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1402         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1403         dev_rss_conf->indTableSize = (uint16_t)((MAX_RX_QUEUES(hw)) * 4);
1404
1405         if (port_rss_conf->rss_key == NULL) {
1406                 /* Default hash key */
1407                 port_rss_conf->rss_key = rss_intel_key;
1408         }
1409
1410         /* loading hashKey */
1411         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1412                dev_rss_conf->hashKeySize);
1413
1414         /* loading indTable */
1415         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1416                 if (j == dev->data->nb_rx_queues)
1417                         j = 0;
1418                 dev_rss_conf->indTable[i] = j;
1419         }
1420
1421         /* loading hashType */
1422         dev_rss_conf->hashType = 0;
1423         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1424         if (rss_hf & RTE_ETH_RSS_IPV4)
1425                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1426         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1427                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1428         if (rss_hf & RTE_ETH_RSS_IPV6)
1429                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1430         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1431                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1432
1433         return VMXNET3_SUCCESS;
1434 }