net/cxgbe: fix port ID in Rx mbuf
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \
52                 RTE_MBUF_F_TX_IPV6 |     \
53                 RTE_MBUF_F_TX_IPV4 |     \
54                 RTE_MBUF_F_TX_L4_MASK |  \
55                 RTE_MBUF_F_TX_TCP_SEG)
56
57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
58         (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73         uint32_t avail = 0;
74
75         if (rxq == NULL)
76                 return;
77
78         PMD_RX_LOG(DEBUG,
79                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81         PMD_RX_LOG(DEBUG,
82                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83                    (unsigned long)rxq->cmd_ring[0].basePA,
84                    (unsigned long)rxq->cmd_ring[1].basePA,
85                    (unsigned long)rxq->comp_ring.basePA);
86
87         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88         PMD_RX_LOG(DEBUG,
89                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90                    (uint32_t)rxq->cmd_ring[0].size, avail,
91                    rxq->comp_ring.next2proc,
92                    rxq->cmd_ring[0].size - avail);
93
94         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97                    rxq->cmd_ring[1].size - avail);
98
99 }
100
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104         uint32_t avail = 0;
105
106         if (txq == NULL)
107                 return;
108
109         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112                    (unsigned long)txq->cmd_ring.basePA,
113                    (unsigned long)txq->comp_ring.basePA,
114                    (unsigned long)txq->data_ring.basePA);
115
116         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)txq->cmd_ring.size, avail,
119                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126         while (ring->next2comp != ring->next2fill) {
127                 /* No need to worry about desc ownership, device is quiesced by now. */
128                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129
130                 if (buf_info->m) {
131                         rte_pktmbuf_free(buf_info->m);
132                         buf_info->m = NULL;
133                         buf_info->bufPA = 0;
134                         buf_info->len = 0;
135                 }
136                 vmxnet3_cmd_ring_adv_next2comp(ring);
137         }
138 }
139
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143         uint32_t i;
144
145         for (i = 0; i < ring->size; i++) {
146                 /* No need to worry about desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free_seg(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         rte_free(ring->buf_info);
163         ring->buf_info = NULL;
164 }
165
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169         vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170
171         if (tq != NULL) {
172                 /* Release mbufs */
173                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176                 /* Release the memzone */
177                 rte_memzone_free(tq->mz);
178                 /* Release the queue */
179                 rte_free(tq);
180         }
181 }
182
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186         int i;
187         vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188
189         if (rq != NULL) {
190                 /* Release mbufs */
191                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193
194                 /* Release both the cmd_rings */
195                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197
198                 /* Release the memzone */
199                 rte_memzone_free(rq->mz);
200
201                 /* Release the queue */
202                 rte_free(rq);
203         }
204 }
205
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209         vmxnet3_tx_queue_t *tq = txq;
210         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213         int size;
214
215         if (tq != NULL) {
216                 /* Release the cmd_ring mbufs */
217                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218         }
219
220         /* Tx vmxnet rings structure initialization*/
221         ring->next2fill = 0;
222         ring->next2comp = 0;
223         ring->gen = VMXNET3_INIT_GEN;
224         comp_ring->next2proc = 0;
225         comp_ring->gen = VMXNET3_INIT_GEN;
226
227         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229         size += tq->txdata_desc_size * data_ring->size;
230
231         memset(ring->base, 0, size);
232 }
233
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237         int i;
238         vmxnet3_rx_queue_t *rq = rxq;
239         struct vmxnet3_hw *hw = rq->hw;
240         struct vmxnet3_cmd_ring *ring0, *ring1;
241         struct vmxnet3_comp_ring *comp_ring;
242         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243         int size;
244
245         /* Release both the cmd_rings mbufs */
246         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248
249         ring0 = &rq->cmd_ring[0];
250         ring1 = &rq->cmd_ring[1];
251         comp_ring = &rq->comp_ring;
252
253         /* Rx vmxnet rings structure initialization */
254         ring0->next2fill = 0;
255         ring1->next2fill = 0;
256         ring0->next2comp = 0;
257         ring1->next2comp = 0;
258         ring0->gen = VMXNET3_INIT_GEN;
259         ring1->gen = VMXNET3_INIT_GEN;
260         comp_ring->next2proc = 0;
261         comp_ring->gen = VMXNET3_INIT_GEN;
262
263         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266                 size += rq->data_desc_size * data_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302
303         /* Release cmd_ring descriptor and free mbuf */
304         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305
306         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307         if (mbuf == NULL)
308                 rte_panic("EOP desc does not point to a valid mbuf");
309         rte_pktmbuf_free(mbuf);
310
311         txq->cmd_ring.buf_info[eop_idx].m = NULL;
312
313         while (txq->cmd_ring.next2comp != eop_idx) {
314                 /* no out-of-order completion */
315                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                 completed++;
318         }
319
320         /* Mark the txd for which tcd was generated as completed */
321         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323         return completed + 1;
324 }
325
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329         int completed = 0;
330         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332                 (comp_ring->base + comp_ring->next2proc);
333
334         while (tcd->gen == comp_ring->gen) {
335                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336
337                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
338                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339                                                     comp_ring->next2proc);
340         }
341
342         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343
344         /* To avoid compiler warnings when not in DEBUG mode. */
345         RTE_SET_USED(completed);
346 }
347
348 uint16_t
349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350         uint16_t nb_pkts)
351 {
352         int32_t ret;
353         uint32_t i;
354         uint64_t ol_flags;
355         struct rte_mbuf *m;
356
357         for (i = 0; i != nb_pkts; i++) {
358                 m = tx_pkts[i];
359                 ol_flags = m->ol_flags;
360
361                 /* Non-TSO packet cannot occupy more than
362                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363                  */
364                 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366                         rte_errno = EINVAL;
367                         return i;
368                 }
369
370                 /* check that only supported TX offloads are requested. */
371                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372                                 (ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373                                 RTE_MBUF_F_TX_SCTP_CKSUM) {
374                         rte_errno = ENOTSUP;
375                         return i;
376                 }
377
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379                 ret = rte_validate_tx_offload(m);
380                 if (ret != 0) {
381                         rte_errno = -ret;
382                         return i;
383                 }
384 #endif
385                 ret = rte_net_intel_cksum_prepare(m);
386                 if (ret != 0) {
387                         rte_errno = -ret;
388                         return i;
389                 }
390         }
391
392         return i;
393 }
394
395 uint16_t
396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397                   uint16_t nb_pkts)
398 {
399         uint16_t nb_tx;
400         vmxnet3_tx_queue_t *txq = tx_queue;
401         struct vmxnet3_hw *hw = txq->hw;
402         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404
405         if (unlikely(txq->stopped)) {
406                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407                 return 0;
408         }
409
410         /* Free up the comp_descriptors aggressively */
411         vmxnet3_tq_tx_complete(txq);
412
413         nb_tx = 0;
414         while (nb_tx < nb_pkts) {
415                 Vmxnet3_GenericDesc *gdesc;
416                 vmxnet3_buf_info_t *tbi;
417                 uint32_t first2fill, avail, dw2;
418                 struct rte_mbuf *txm = tx_pkts[nb_tx];
419                 struct rte_mbuf *m_seg = txm;
420                 int copy_size = 0;
421                 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422                 /* # of descriptors needed for a packet. */
423                 unsigned count = txm->nb_segs;
424
425                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426                 if (count > avail) {
427                         /* Is command ring full? */
428                         if (unlikely(avail == 0)) {
429                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
430                                 txq->stats.tx_ring_full++;
431                                 txq->stats.drop_total += (nb_pkts - nb_tx);
432                                 break;
433                         }
434
435                         /* Command ring is not full but cannot handle the
436                          * multi-segmented packet. Let's try the next packet
437                          * in this case.
438                          */
439                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440                                    "(avail %d needed %d)", avail, count);
441                         txq->stats.drop_total++;
442                         if (tso)
443                                 txq->stats.drop_tso++;
444                         rte_pktmbuf_free(txm);
445                         nb_tx++;
446                         continue;
447                 }
448
449                 /* Drop non-TSO packet that is excessively fragmented */
450                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453                         txq->stats.drop_too_many_segs++;
454                         txq->stats.drop_total++;
455                         rte_pktmbuf_free(txm);
456                         nb_tx++;
457                         continue;
458                 }
459
460                 if (txm->nb_segs == 1 &&
461                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462                         struct Vmxnet3_TxDataDesc *tdd;
463
464                         /* Skip empty packets */
465                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466                                 txq->stats.drop_total++;
467                                 rte_pktmbuf_free(txm);
468                                 nb_tx++;
469                                 continue;
470                         }
471
472                         tdd = (struct Vmxnet3_TxDataDesc *)
473                                 ((uint8 *)txq->data_ring.base +
474                                  txq->cmd_ring.next2fill *
475                                  txq->txdata_desc_size);
476                         copy_size = rte_pktmbuf_pkt_len(txm);
477                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478                 }
479
480                 /* use the previous gen bit for the SOP desc */
481                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482                 first2fill = txq->cmd_ring.next2fill;
483                 do {
484                         /* Remember the transmit buffer for cleanup */
485                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486
487                         /* NB: the following assumes that VMXNET3 maximum
488                          * transmit buffer size (16K) is greater than
489                          * maximum size of mbuf segment size.
490                          */
491                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492
493                         /* Skip empty segments */
494                         if (unlikely(m_seg->data_len == 0))
495                                 continue;
496
497                         if (copy_size) {
498                                 uint64 offset =
499                                         (uint64)txq->cmd_ring.next2fill *
500                                                         txq->txdata_desc_size;
501                                 gdesc->txd.addr =
502                                         rte_cpu_to_le_64(txq->data_ring.basePA +
503                                                          offset);
504                         } else {
505                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506                         }
507
508                         gdesc->dword[2] = dw2 | m_seg->data_len;
509                         gdesc->dword[3] = 0;
510
511                         /* move to the next2fill descriptor */
512                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513
514                         /* use the right gen for non-SOP desc */
515                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516                 } while ((m_seg = m_seg->next) != NULL);
517
518                 /* set the last buf_info for the pkt */
519                 tbi->m = txm;
520                 /* Update the EOP descriptor */
521                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522
523                 /* Add VLAN tag if present */
524                 gdesc = txq->cmd_ring.base + first2fill;
525                 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526                         gdesc->txd.ti = 1;
527                         gdesc->txd.tci = txm->vlan_tci;
528                 }
529
530                 if (tso) {
531                         uint16_t mss = txm->tso_segsz;
532
533                         RTE_ASSERT(mss > 0);
534
535                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536                         gdesc->txd.om = VMXNET3_OM_TSO;
537                         gdesc->txd.msscof = mss;
538
539                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540                 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541                         gdesc->txd.om = VMXNET3_OM_CSUM;
542                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543
544                         switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545                         case RTE_MBUF_F_TX_TCP_CKSUM:
546                                 gdesc->txd.msscof = gdesc->txd.hlen +
547                                         offsetof(struct rte_tcp_hdr, cksum);
548                                 break;
549                         case RTE_MBUF_F_TX_UDP_CKSUM:
550                                 gdesc->txd.msscof = gdesc->txd.hlen +
551                                         offsetof(struct rte_udp_hdr,
552                                                 dgram_cksum);
553                                 break;
554                         default:
555                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556                                            txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557                                 abort();
558                         }
559                         deferred++;
560                 } else {
561                         gdesc->txd.hlen = 0;
562                         gdesc->txd.om = VMXNET3_OM_NONE;
563                         gdesc->txd.msscof = 0;
564                         deferred++;
565                 }
566
567                 /* flip the GEN bit on the SOP */
568                 rte_compiler_barrier();
569                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570
571                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572                 nb_tx++;
573         }
574
575         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576
577         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578                 txq_ctrl->txNumDeferred = 0;
579                 /* Notify vSwitch that packets are available. */
580                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581                                        txq->cmd_ring.next2fill);
582         }
583
584         return nb_tx;
585 }
586
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589                    struct rte_mbuf *mbuf)
590 {
591         uint32_t val;
592         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593         struct Vmxnet3_RxDesc *rxd =
594                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596
597         if (ring_id == 0) {
598                 /* Usually: One HEAD type buf per packet
599                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601                  */
602
603                 /* We use single packet buffer so all heads here */
604                 val = VMXNET3_RXD_BTYPE_HEAD;
605         } else {
606                 /* All BODY type buffers for 2nd ring */
607                 val = VMXNET3_RXD_BTYPE_BODY;
608         }
609
610         /*
611          * Load mbuf pointer into buf_info[ring_size]
612          * buf_info structure is equivalent to cookie for virtio-virtqueue
613          */
614         buf_info->m = mbuf;
615         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617
618         /* Load Rx Descriptor with the buffer's GPA */
619         rxd->addr = buf_info->bufPA;
620
621         /* After this point rxd->addr MUST not be NULL */
622         rxd->btype = val;
623         rxd->len = buf_info->len;
624         /* Flip gen bit at the end to change ownership */
625         rxd->gen = ring->gen;
626
627         vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
631  *  so that device can receive packets in those buffers.
632  *  Ring layout:
633  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
634  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
635  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
637  *      only for LRO.
638  */
639 static int
640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642         int err = 0;
643         uint32_t i = 0;
644         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645
646         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647                 struct rte_mbuf *mbuf;
648
649                 /* Allocate blank mbuf for the current Rx Descriptor */
650                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
651                 if (unlikely(mbuf == NULL)) {
652                         PMD_RX_LOG(ERR, "Error allocating mbuf");
653                         rxq->stats.rx_buf_alloc_failure++;
654                         err = ENOMEM;
655                         break;
656                 }
657
658                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
659                 i++;
660         }
661
662         /* Return error only if no buffers are posted at present */
663         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664                 return -err;
665         else
666                 return i;
667 }
668
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672                 struct rte_mbuf *rxm)
673 {
674         uint32_t hlen, slen;
675         struct rte_ipv4_hdr *ipv4_hdr;
676         struct rte_ipv6_hdr *ipv6_hdr;
677         struct rte_tcp_hdr *tcp_hdr;
678         char *ptr;
679         uint8_t segs;
680
681         RTE_ASSERT(rcd->tcp);
682
683         ptr = rte_pktmbuf_mtod(rxm, char *);
684         slen = rte_pktmbuf_data_len(rxm);
685         hlen = sizeof(struct rte_ether_hdr);
686
687         if (rcd->v4) {
688                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
690                                         - sizeof(struct rte_tcp_hdr);
691
692                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
694         } else if (rcd->v6) {
695                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697                                         sizeof(struct rte_tcp_hdr);
698
699                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700                 hlen += sizeof(struct rte_ipv6_hdr);
701                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702                         int frag;
703
704                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705                                         &hlen, &frag);
706                 }
707         }
708
709         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711                                 sizeof(struct rte_ether_hdr);
712
713         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715
716         segs = *vmxnet3_segs_dynfield(rxm);
717         if (segs > 1)
718                 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719         else
720                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722
723 /* Receive side checksum and other offloads */
724 static inline void
725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726                 struct rte_mbuf *rxm, const uint8_t sop)
727 {
728         uint64_t ol_flags = rxm->ol_flags;
729         uint32_t packet_type = rxm->packet_type;
730
731         /* Offloads set in sop */
732         if (sop) {
733                 /* Set packet type */
734                 packet_type |= RTE_PTYPE_L2_ETHER;
735
736                 /* Check large packet receive */
737                 if (VMXNET3_VERSION_GE_2(hw) &&
738                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739                         const Vmxnet3_RxCompDescExt *rcde =
740                                         (const Vmxnet3_RxCompDescExt *)rcd;
741
742                         rxm->tso_segsz = rcde->mss;
743                         *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744                         ol_flags |= RTE_MBUF_F_RX_LRO;
745                 }
746         } else { /* Offloads set in eop */
747                 /* Check for RSS */
748                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749                         ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750                         rxm->hash.rss = rcd->rssHash;
751                 }
752
753                 /* Check for hardware stripped VLAN tag */
754                 if (rcd->ts) {
755                         ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757                 }
758
759                 /* Check packet type, checksum errors, etc. */
760                 if (rcd->cnc) {
761                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762                 } else {
763                         if (rcd->v4) {
764                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765
766                                 if (rcd->ipc)
767                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
768                                 else
769                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
770
771                                 if (rcd->tuc) {
772                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
773                                         if (rcd->tcp)
774                                                 packet_type |= RTE_PTYPE_L4_TCP;
775                                         else
776                                                 packet_type |= RTE_PTYPE_L4_UDP;
777                                 } else {
778                                         if (rcd->tcp) {
779                                                 packet_type |= RTE_PTYPE_L4_TCP;
780                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
781                                         } else if (rcd->udp) {
782                                                 packet_type |= RTE_PTYPE_L4_UDP;
783                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
784                                         }
785                                 }
786                         } else if (rcd->v6) {
787                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
788
789                                 if (rcd->tuc) {
790                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
791                                         if (rcd->tcp)
792                                                 packet_type |= RTE_PTYPE_L4_TCP;
793                                         else
794                                                 packet_type |= RTE_PTYPE_L4_UDP;
795                                 } else {
796                                         if (rcd->tcp) {
797                                                 packet_type |= RTE_PTYPE_L4_TCP;
798                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
799                                         } else if (rcd->udp) {
800                                                 packet_type |= RTE_PTYPE_L4_UDP;
801                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
802                                         }
803                                 }
804                         } else {
805                                 packet_type |= RTE_PTYPE_UNKNOWN;
806                         }
807
808                         /* Old variants of vmxnet3 do not provide MSS */
809                         if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
810                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
811                                                 rcd, rxm);
812                 }
813         }
814
815         rxm->ol_flags = ol_flags;
816         rxm->packet_type = packet_type;
817 }
818
819 /*
820  * Process the Rx Completion Ring of given vmxnet3_rx_queue
821  * for nb_pkts burst and return the number of packets received
822  */
823 uint16_t
824 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
825 {
826         uint16_t nb_rx;
827         uint32_t nb_rxd, idx;
828         uint8_t ring_idx;
829         vmxnet3_rx_queue_t *rxq;
830         Vmxnet3_RxCompDesc *rcd;
831         vmxnet3_buf_info_t *rbi;
832         Vmxnet3_RxDesc *rxd;
833         struct rte_mbuf *rxm = NULL;
834         struct vmxnet3_hw *hw;
835
836         nb_rx = 0;
837         ring_idx = 0;
838         nb_rxd = 0;
839         idx = 0;
840
841         rxq = rx_queue;
842         hw = rxq->hw;
843
844         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
845
846         if (unlikely(rxq->stopped)) {
847                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
848                 return 0;
849         }
850
851         while (rcd->gen == rxq->comp_ring.gen) {
852                 struct rte_mbuf *newm;
853
854                 if (nb_rx >= nb_pkts)
855                         break;
856
857                 newm = rte_mbuf_raw_alloc(rxq->mp);
858                 if (unlikely(newm == NULL)) {
859                         PMD_RX_LOG(ERR, "Error allocating mbuf");
860                         rxq->stats.rx_buf_alloc_failure++;
861                         break;
862                 }
863
864                 idx = rcd->rxdIdx;
865                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
866                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
867                 RTE_SET_USED(rxd); /* used only for assert when enabled */
868                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
869
870                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
871
872                 RTE_ASSERT(rcd->len <= rxd->len);
873                 RTE_ASSERT(rbi->m);
874
875                 /* Get the packet buffer pointer from buf_info */
876                 rxm = rbi->m;
877
878                 /* Clear descriptor associated buf_info to be reused */
879                 rbi->m = NULL;
880                 rbi->bufPA = 0;
881
882                 /* Update the index that we received a packet */
883                 rxq->cmd_ring[ring_idx].next2comp = idx;
884
885                 /* For RCD with EOP set, check if there is frame error */
886                 if (unlikely(rcd->eop && rcd->err)) {
887                         rxq->stats.drop_total++;
888                         rxq->stats.drop_err++;
889
890                         if (!rcd->fcs) {
891                                 rxq->stats.drop_fcs++;
892                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
893                         }
894                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
895                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
896                                          rxq->comp_ring.base), rcd->rxdIdx);
897                         rte_pktmbuf_free_seg(rxm);
898                         if (rxq->start_seg) {
899                                 struct rte_mbuf *start = rxq->start_seg;
900
901                                 rxq->start_seg = NULL;
902                                 rte_pktmbuf_free(start);
903                         }
904                         goto rcd_done;
905                 }
906
907                 /* Initialize newly received packet buffer */
908                 rxm->port = rxq->port_id;
909                 rxm->nb_segs = 1;
910                 rxm->next = NULL;
911                 rxm->pkt_len = (uint16_t)rcd->len;
912                 rxm->data_len = (uint16_t)rcd->len;
913                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
914                 rxm->ol_flags = 0;
915                 rxm->vlan_tci = 0;
916                 rxm->packet_type = 0;
917
918                 /*
919                  * If this is the first buffer of the received packet,
920                  * set the pointer to the first mbuf of the packet
921                  * Otherwise, update the total length and the number of segments
922                  * of the current scattered packet, and update the pointer to
923                  * the last mbuf of the current packet.
924                  */
925                 if (rcd->sop) {
926                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
927
928                         if (unlikely(rcd->len == 0)) {
929                                 RTE_ASSERT(rcd->eop);
930
931                                 PMD_RX_LOG(DEBUG,
932                                            "Rx buf was skipped. rxring[%d][%d])",
933                                            ring_idx, idx);
934                                 rte_pktmbuf_free_seg(rxm);
935                                 goto rcd_done;
936                         }
937
938                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
939                                 uint8_t *rdd = rxq->data_ring.base +
940                                         idx * rxq->data_desc_size;
941
942                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
943                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
944                                            rdd, rcd->len);
945                         }
946
947                         rxq->start_seg = rxm;
948                         rxq->last_seg = rxm;
949                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
950                 } else {
951                         struct rte_mbuf *start = rxq->start_seg;
952
953                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
954
955                         if (likely(start && rxm->data_len > 0)) {
956                                 start->pkt_len += rxm->data_len;
957                                 start->nb_segs++;
958
959                                 rxq->last_seg->next = rxm;
960                                 rxq->last_seg = rxm;
961                         } else {
962                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
963                                 rxq->stats.drop_total++;
964                                 rxq->stats.drop_err++;
965
966                                 rte_pktmbuf_free_seg(rxm);
967                         }
968                 }
969
970                 if (rcd->eop) {
971                         struct rte_mbuf *start = rxq->start_seg;
972
973                         vmxnet3_rx_offload(hw, rcd, start, 0);
974                         rx_pkts[nb_rx++] = start;
975                         rxq->start_seg = NULL;
976                 }
977
978 rcd_done:
979                 rxq->cmd_ring[ring_idx].next2comp = idx;
980                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
981                                           rxq->cmd_ring[ring_idx].size);
982
983                 /* It's time to renew descriptors */
984                 vmxnet3_renew_desc(rxq, ring_idx, newm);
985                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
986                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
987                                                rxq->cmd_ring[ring_idx].next2fill);
988                 }
989
990                 /* Advance to the next descriptor in comp_ring */
991                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
992
993                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
994                 nb_rxd++;
995                 if (nb_rxd > rxq->cmd_ring[0].size) {
996                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
997                                    " relinquish control.");
998                         break;
999                 }
1000         }
1001
1002         if (unlikely(nb_rxd == 0)) {
1003                 uint32_t avail;
1004                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1005                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1006                         if (unlikely(avail > 0)) {
1007                                 /* try to alloc new buf and renew descriptors */
1008                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1009                         }
1010                 }
1011                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1012                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1013                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1014                                                        rxq->cmd_ring[ring_idx].next2fill);
1015                         }
1016                 }
1017         }
1018
1019         return nb_rx;
1020 }
1021
1022 int
1023 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1024                            uint16_t queue_idx,
1025                            uint16_t nb_desc,
1026                            unsigned int socket_id,
1027                            const struct rte_eth_txconf *tx_conf __rte_unused)
1028 {
1029         struct vmxnet3_hw *hw = dev->data->dev_private;
1030         const struct rte_memzone *mz;
1031         struct vmxnet3_tx_queue *txq;
1032         struct vmxnet3_cmd_ring *ring;
1033         struct vmxnet3_comp_ring *comp_ring;
1034         struct vmxnet3_data_ring *data_ring;
1035         int size;
1036
1037         PMD_INIT_FUNC_TRACE();
1038
1039         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1040                           RTE_CACHE_LINE_SIZE);
1041         if (txq == NULL) {
1042                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1043                 return -ENOMEM;
1044         }
1045
1046         txq->queue_id = queue_idx;
1047         txq->port_id = dev->data->port_id;
1048         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1049         txq->hw = hw;
1050         txq->qid = queue_idx;
1051         txq->stopped = TRUE;
1052         txq->txdata_desc_size = hw->txdata_desc_size;
1053
1054         ring = &txq->cmd_ring;
1055         comp_ring = &txq->comp_ring;
1056         data_ring = &txq->data_ring;
1057
1058         /* Tx vmxnet ring length should be between 512-4096 */
1059         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1060                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1061                              VMXNET3_DEF_TX_RING_SIZE);
1062                 return -EINVAL;
1063         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1064                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1065                              VMXNET3_TX_RING_MAX_SIZE);
1066                 return -EINVAL;
1067         } else {
1068                 ring->size = nb_desc;
1069                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1070         }
1071         comp_ring->size = data_ring->size = ring->size;
1072
1073         /* Tx vmxnet rings structure initialization*/
1074         ring->next2fill = 0;
1075         ring->next2comp = 0;
1076         ring->gen = VMXNET3_INIT_GEN;
1077         comp_ring->next2proc = 0;
1078         comp_ring->gen = VMXNET3_INIT_GEN;
1079
1080         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1081         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1082         size += txq->txdata_desc_size * data_ring->size;
1083
1084         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1085                                       VMXNET3_RING_BA_ALIGN, socket_id);
1086         if (mz == NULL) {
1087                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1088                 return -ENOMEM;
1089         }
1090         txq->mz = mz;
1091         memset(mz->addr, 0, mz->len);
1092
1093         /* cmd_ring initialization */
1094         ring->base = mz->addr;
1095         ring->basePA = mz->iova;
1096
1097         /* comp_ring initialization */
1098         comp_ring->base = ring->base + ring->size;
1099         comp_ring->basePA = ring->basePA +
1100                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1101
1102         /* data_ring initialization */
1103         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1104         data_ring->basePA = comp_ring->basePA +
1105                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1106
1107         /* cmd_ring0 buf_info allocation */
1108         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1109                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1110         if (ring->buf_info == NULL) {
1111                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1112                 return -ENOMEM;
1113         }
1114
1115         /* Update the data portion with txq */
1116         dev->data->tx_queues[queue_idx] = txq;
1117
1118         return 0;
1119 }
1120
1121 int
1122 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1123                            uint16_t queue_idx,
1124                            uint16_t nb_desc,
1125                            unsigned int socket_id,
1126                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1127                            struct rte_mempool *mp)
1128 {
1129         const struct rte_memzone *mz;
1130         struct vmxnet3_rx_queue *rxq;
1131         struct vmxnet3_hw *hw = dev->data->dev_private;
1132         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1133         struct vmxnet3_comp_ring *comp_ring;
1134         struct vmxnet3_rx_data_ring *data_ring;
1135         int size;
1136         uint8_t i;
1137         char mem_name[32];
1138
1139         PMD_INIT_FUNC_TRACE();
1140
1141         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1142                           RTE_CACHE_LINE_SIZE);
1143         if (rxq == NULL) {
1144                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1145                 return -ENOMEM;
1146         }
1147
1148         rxq->mp = mp;
1149         rxq->queue_id = queue_idx;
1150         rxq->port_id = dev->data->port_id;
1151         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1152         rxq->hw = hw;
1153         rxq->qid1 = queue_idx;
1154         rxq->qid2 = queue_idx + hw->num_rx_queues;
1155         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1156         rxq->data_desc_size = hw->rxdata_desc_size;
1157         rxq->stopped = TRUE;
1158
1159         ring0 = &rxq->cmd_ring[0];
1160         ring1 = &rxq->cmd_ring[1];
1161         comp_ring = &rxq->comp_ring;
1162         data_ring = &rxq->data_ring;
1163
1164         /* Rx vmxnet rings length should be between 256-4096 */
1165         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1166                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1167                 return -EINVAL;
1168         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1169                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1170                 return -EINVAL;
1171         } else {
1172                 ring0->size = nb_desc;
1173                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1174                 ring1->size = ring0->size;
1175         }
1176
1177         comp_ring->size = ring0->size + ring1->size;
1178         data_ring->size = ring0->size;
1179
1180         /* Rx vmxnet rings structure initialization */
1181         ring0->next2fill = 0;
1182         ring1->next2fill = 0;
1183         ring0->next2comp = 0;
1184         ring1->next2comp = 0;
1185         ring0->gen = VMXNET3_INIT_GEN;
1186         ring1->gen = VMXNET3_INIT_GEN;
1187         comp_ring->next2proc = 0;
1188         comp_ring->gen = VMXNET3_INIT_GEN;
1189
1190         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1191         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1192         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1193                 size += rxq->data_desc_size * data_ring->size;
1194
1195         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1196                                       VMXNET3_RING_BA_ALIGN, socket_id);
1197         if (mz == NULL) {
1198                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1199                 return -ENOMEM;
1200         }
1201         rxq->mz = mz;
1202         memset(mz->addr, 0, mz->len);
1203
1204         /* cmd_ring0 initialization */
1205         ring0->base = mz->addr;
1206         ring0->basePA = mz->iova;
1207
1208         /* cmd_ring1 initialization */
1209         ring1->base = ring0->base + ring0->size;
1210         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1211
1212         /* comp_ring initialization */
1213         comp_ring->base = ring1->base + ring1->size;
1214         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1215                 ring1->size;
1216
1217         /* data_ring initialization */
1218         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1219                 data_ring->base =
1220                         (uint8_t *)(comp_ring->base + comp_ring->size);
1221                 data_ring->basePA = comp_ring->basePA +
1222                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1223         }
1224
1225         /* cmd_ring0-cmd_ring1 buf_info allocation */
1226         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1227
1228                 ring = &rxq->cmd_ring[i];
1229                 ring->rid = i;
1230                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1231
1232                 ring->buf_info = rte_zmalloc(mem_name,
1233                                              ring->size * sizeof(vmxnet3_buf_info_t),
1234                                              RTE_CACHE_LINE_SIZE);
1235                 if (ring->buf_info == NULL) {
1236                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1237                         return -ENOMEM;
1238                 }
1239         }
1240
1241         /* Update the data portion with rxq */
1242         dev->data->rx_queues[queue_idx] = rxq;
1243
1244         return 0;
1245 }
1246
1247 /*
1248  * Initializes Receive Unit
1249  * Load mbufs in rx queue in advance
1250  */
1251 int
1252 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1253 {
1254         struct vmxnet3_hw *hw = dev->data->dev_private;
1255
1256         int i, ret;
1257         uint8_t j;
1258
1259         PMD_INIT_FUNC_TRACE();
1260
1261         for (i = 0; i < hw->num_rx_queues; i++) {
1262                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1263
1264                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1265                         /* Passing 0 as alloc_num will allocate full ring */
1266                         ret = vmxnet3_post_rx_bufs(rxq, j);
1267                         if (ret <= 0) {
1268                                 PMD_INIT_LOG(ERR,
1269                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1270                                              i, j);
1271                                 return -ret;
1272                         }
1273                         /*
1274                          * Updating device with the index:next2fill to fill the
1275                          * mbufs for coming packets.
1276                          */
1277                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1278                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1279                                                        rxq->cmd_ring[j].next2fill);
1280                         }
1281                 }
1282                 rxq->stopped = FALSE;
1283                 rxq->start_seg = NULL;
1284         }
1285
1286         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1287                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1288
1289                 txq->stopped = FALSE;
1290         }
1291
1292         return 0;
1293 }
1294
1295 static uint8_t rss_intel_key[40] = {
1296         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1297         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1298         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1299         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1300         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1301 };
1302
1303 /*
1304  * Additional RSS configurations based on vmxnet v4+ APIs
1305  */
1306 int
1307 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1308 {
1309         struct vmxnet3_hw *hw = dev->data->dev_private;
1310         Vmxnet3_DriverShared *shared = hw->shared;
1311         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1312         struct rte_eth_rss_conf *port_rss_conf;
1313         uint64_t rss_hf;
1314         uint32_t ret;
1315
1316         PMD_INIT_FUNC_TRACE();
1317
1318         cmdInfo->setRSSFields = 0;
1319         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1320
1321         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1322             VMXNET3_MANDATORY_V4_RSS) {
1323                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1324                              "automatically setting it");
1325                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1326         }
1327
1328         rss_hf = port_rss_conf->rss_hf &
1329                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1330
1331         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1332                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1333         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1334                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1335         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1336                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1337         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1338                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1339
1340         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1341                                VMXNET3_CMD_SET_RSS_FIELDS);
1342         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1343
1344         if (ret != VMXNET3_SUCCESS) {
1345                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1346         }
1347
1348         return ret;
1349 }
1350
1351 /*
1352  * Configure RSS feature
1353  */
1354 int
1355 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1356 {
1357         struct vmxnet3_hw *hw = dev->data->dev_private;
1358         struct VMXNET3_RSSConf *dev_rss_conf;
1359         struct rte_eth_rss_conf *port_rss_conf;
1360         uint64_t rss_hf;
1361         uint8_t i, j;
1362
1363         PMD_INIT_FUNC_TRACE();
1364
1365         dev_rss_conf = hw->rss_conf;
1366         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1367
1368         /* loading hashFunc */
1369         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1370         /* loading hashKeySize */
1371         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1372         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1373         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1374
1375         if (port_rss_conf->rss_key == NULL) {
1376                 /* Default hash key */
1377                 port_rss_conf->rss_key = rss_intel_key;
1378         }
1379
1380         /* loading hashKey */
1381         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1382                dev_rss_conf->hashKeySize);
1383
1384         /* loading indTable */
1385         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1386                 if (j == dev->data->nb_rx_queues)
1387                         j = 0;
1388                 dev_rss_conf->indTable[i] = j;
1389         }
1390
1391         /* loading hashType */
1392         dev_rss_conf->hashType = 0;
1393         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1394         if (rss_hf & RTE_ETH_RSS_IPV4)
1395                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1396         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1397                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1398         if (rss_hf & RTE_ETH_RSS_IPV6)
1399                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1400         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1401                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1402
1403         return VMXNET3_SUCCESS;
1404 }