mbuf: add namespace to offload flags
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \
52                 RTE_MBUF_F_TX_IPV6 |     \
53                 RTE_MBUF_F_TX_IPV4 |     \
54                 RTE_MBUF_F_TX_L4_MASK |  \
55                 RTE_MBUF_F_TX_TCP_SEG)
56
57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
58         (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73         uint32_t avail = 0;
74
75         if (rxq == NULL)
76                 return;
77
78         PMD_RX_LOG(DEBUG,
79                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81         PMD_RX_LOG(DEBUG,
82                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83                    (unsigned long)rxq->cmd_ring[0].basePA,
84                    (unsigned long)rxq->cmd_ring[1].basePA,
85                    (unsigned long)rxq->comp_ring.basePA);
86
87         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88         PMD_RX_LOG(DEBUG,
89                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90                    (uint32_t)rxq->cmd_ring[0].size, avail,
91                    rxq->comp_ring.next2proc,
92                    rxq->cmd_ring[0].size - avail);
93
94         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97                    rxq->cmd_ring[1].size - avail);
98
99 }
100
101 static void
102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104         uint32_t avail = 0;
105
106         if (txq == NULL)
107                 return;
108
109         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112                    (unsigned long)txq->cmd_ring.basePA,
113                    (unsigned long)txq->comp_ring.basePA,
114                    (unsigned long)txq->data_ring.basePA);
115
116         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)txq->cmd_ring.size, avail,
119                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122
123 static void
124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126         while (ring->next2comp != ring->next2fill) {
127                 /* No need to worry about desc ownership, device is quiesced by now. */
128                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129
130                 if (buf_info->m) {
131                         rte_pktmbuf_free(buf_info->m);
132                         buf_info->m = NULL;
133                         buf_info->bufPA = 0;
134                         buf_info->len = 0;
135                 }
136                 vmxnet3_cmd_ring_adv_next2comp(ring);
137         }
138 }
139
140 static void
141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143         uint32_t i;
144
145         for (i = 0; i < ring->size; i++) {
146                 /* No need to worry about desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free_seg(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         rte_free(ring->buf_info);
163         ring->buf_info = NULL;
164 }
165
166 void
167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169         vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170
171         if (tq != NULL) {
172                 /* Release mbufs */
173                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176                 /* Release the memzone */
177                 rte_memzone_free(tq->mz);
178                 /* Release the queue */
179                 rte_free(tq);
180         }
181 }
182
183 void
184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186         int i;
187         vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188
189         if (rq != NULL) {
190                 /* Release mbufs */
191                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193
194                 /* Release both the cmd_rings */
195                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197
198                 /* Release the memzone */
199                 rte_memzone_free(rq->mz);
200
201                 /* Release the queue */
202                 rte_free(rq);
203         }
204 }
205
206 static void
207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209         vmxnet3_tx_queue_t *tq = txq;
210         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213         int size;
214
215         if (tq != NULL) {
216                 /* Release the cmd_ring mbufs */
217                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218         }
219
220         /* Tx vmxnet rings structure initialization*/
221         ring->next2fill = 0;
222         ring->next2comp = 0;
223         ring->gen = VMXNET3_INIT_GEN;
224         comp_ring->next2proc = 0;
225         comp_ring->gen = VMXNET3_INIT_GEN;
226
227         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229         size += tq->txdata_desc_size * data_ring->size;
230
231         memset(ring->base, 0, size);
232 }
233
234 static void
235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237         int i;
238         vmxnet3_rx_queue_t *rq = rxq;
239         struct vmxnet3_hw *hw = rq->hw;
240         struct vmxnet3_cmd_ring *ring0, *ring1;
241         struct vmxnet3_comp_ring *comp_ring;
242         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243         int size;
244
245         /* Release both the cmd_rings mbufs */
246         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248
249         ring0 = &rq->cmd_ring[0];
250         ring1 = &rq->cmd_ring[1];
251         comp_ring = &rq->comp_ring;
252
253         /* Rx vmxnet rings structure initialization */
254         ring0->next2fill = 0;
255         ring1->next2fill = 0;
256         ring0->next2comp = 0;
257         ring1->next2comp = 0;
258         ring0->gen = VMXNET3_INIT_GEN;
259         ring1->gen = VMXNET3_INIT_GEN;
260         comp_ring->next2proc = 0;
261         comp_ring->gen = VMXNET3_INIT_GEN;
262
263         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266                 size += rq->data_desc_size * data_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static int
298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302
303         /* Release cmd_ring descriptor and free mbuf */
304         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305
306         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307         if (mbuf == NULL)
308                 rte_panic("EOP desc does not point to a valid mbuf");
309         rte_pktmbuf_free(mbuf);
310
311         txq->cmd_ring.buf_info[eop_idx].m = NULL;
312
313         while (txq->cmd_ring.next2comp != eop_idx) {
314                 /* no out-of-order completion */
315                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                 completed++;
318         }
319
320         /* Mark the txd for which tcd was generated as completed */
321         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323         return completed + 1;
324 }
325
326 static void
327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329         int completed = 0;
330         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332                 (comp_ring->base + comp_ring->next2proc);
333
334         while (tcd->gen == comp_ring->gen) {
335                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336
337                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
338                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339                                                     comp_ring->next2proc);
340         }
341
342         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343 }
344
345 uint16_t
346 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
347         uint16_t nb_pkts)
348 {
349         int32_t ret;
350         uint32_t i;
351         uint64_t ol_flags;
352         struct rte_mbuf *m;
353
354         for (i = 0; i != nb_pkts; i++) {
355                 m = tx_pkts[i];
356                 ol_flags = m->ol_flags;
357
358                 /* Non-TSO packet cannot occupy more than
359                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
360                  */
361                 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
362                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
363                         rte_errno = EINVAL;
364                         return i;
365                 }
366
367                 /* check that only supported TX offloads are requested. */
368                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
369                                 (ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
370                                 RTE_MBUF_F_TX_SCTP_CKSUM) {
371                         rte_errno = ENOTSUP;
372                         return i;
373                 }
374
375 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
376                 ret = rte_validate_tx_offload(m);
377                 if (ret != 0) {
378                         rte_errno = -ret;
379                         return i;
380                 }
381 #endif
382                 ret = rte_net_intel_cksum_prepare(m);
383                 if (ret != 0) {
384                         rte_errno = -ret;
385                         return i;
386                 }
387         }
388
389         return i;
390 }
391
392 uint16_t
393 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
394                   uint16_t nb_pkts)
395 {
396         uint16_t nb_tx;
397         vmxnet3_tx_queue_t *txq = tx_queue;
398         struct vmxnet3_hw *hw = txq->hw;
399         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
400         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
401
402         if (unlikely(txq->stopped)) {
403                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
404                 return 0;
405         }
406
407         /* Free up the comp_descriptors aggressively */
408         vmxnet3_tq_tx_complete(txq);
409
410         nb_tx = 0;
411         while (nb_tx < nb_pkts) {
412                 Vmxnet3_GenericDesc *gdesc;
413                 vmxnet3_buf_info_t *tbi;
414                 uint32_t first2fill, avail, dw2;
415                 struct rte_mbuf *txm = tx_pkts[nb_tx];
416                 struct rte_mbuf *m_seg = txm;
417                 int copy_size = 0;
418                 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
419                 /* # of descriptors needed for a packet. */
420                 unsigned count = txm->nb_segs;
421
422                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
423                 if (count > avail) {
424                         /* Is command ring full? */
425                         if (unlikely(avail == 0)) {
426                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
427                                 txq->stats.tx_ring_full++;
428                                 txq->stats.drop_total += (nb_pkts - nb_tx);
429                                 break;
430                         }
431
432                         /* Command ring is not full but cannot handle the
433                          * multi-segmented packet. Let's try the next packet
434                          * in this case.
435                          */
436                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
437                                    "(avail %d needed %d)", avail, count);
438                         txq->stats.drop_total++;
439                         if (tso)
440                                 txq->stats.drop_tso++;
441                         rte_pktmbuf_free(txm);
442                         nb_tx++;
443                         continue;
444                 }
445
446                 /* Drop non-TSO packet that is excessively fragmented */
447                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
448                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
449                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
450                         txq->stats.drop_too_many_segs++;
451                         txq->stats.drop_total++;
452                         rte_pktmbuf_free(txm);
453                         nb_tx++;
454                         continue;
455                 }
456
457                 if (txm->nb_segs == 1 &&
458                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
459                         struct Vmxnet3_TxDataDesc *tdd;
460
461                         /* Skip empty packets */
462                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
463                                 txq->stats.drop_total++;
464                                 rte_pktmbuf_free(txm);
465                                 nb_tx++;
466                                 continue;
467                         }
468
469                         tdd = (struct Vmxnet3_TxDataDesc *)
470                                 ((uint8 *)txq->data_ring.base +
471                                  txq->cmd_ring.next2fill *
472                                  txq->txdata_desc_size);
473                         copy_size = rte_pktmbuf_pkt_len(txm);
474                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
475                 }
476
477                 /* use the previous gen bit for the SOP desc */
478                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
479                 first2fill = txq->cmd_ring.next2fill;
480                 do {
481                         /* Remember the transmit buffer for cleanup */
482                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
483
484                         /* NB: the following assumes that VMXNET3 maximum
485                          * transmit buffer size (16K) is greater than
486                          * maximum size of mbuf segment size.
487                          */
488                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
489
490                         /* Skip empty segments */
491                         if (unlikely(m_seg->data_len == 0))
492                                 continue;
493
494                         if (copy_size) {
495                                 uint64 offset =
496                                         (uint64)txq->cmd_ring.next2fill *
497                                                         txq->txdata_desc_size;
498                                 gdesc->txd.addr =
499                                         rte_cpu_to_le_64(txq->data_ring.basePA +
500                                                          offset);
501                         } else {
502                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
503                         }
504
505                         gdesc->dword[2] = dw2 | m_seg->data_len;
506                         gdesc->dword[3] = 0;
507
508                         /* move to the next2fill descriptor */
509                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
510
511                         /* use the right gen for non-SOP desc */
512                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
513                 } while ((m_seg = m_seg->next) != NULL);
514
515                 /* set the last buf_info for the pkt */
516                 tbi->m = txm;
517                 /* Update the EOP descriptor */
518                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
519
520                 /* Add VLAN tag if present */
521                 gdesc = txq->cmd_ring.base + first2fill;
522                 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
523                         gdesc->txd.ti = 1;
524                         gdesc->txd.tci = txm->vlan_tci;
525                 }
526
527                 if (tso) {
528                         uint16_t mss = txm->tso_segsz;
529
530                         RTE_ASSERT(mss > 0);
531
532                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
533                         gdesc->txd.om = VMXNET3_OM_TSO;
534                         gdesc->txd.msscof = mss;
535
536                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
537                 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
538                         gdesc->txd.om = VMXNET3_OM_CSUM;
539                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
540
541                         switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
542                         case RTE_MBUF_F_TX_TCP_CKSUM:
543                                 gdesc->txd.msscof = gdesc->txd.hlen +
544                                         offsetof(struct rte_tcp_hdr, cksum);
545                                 break;
546                         case RTE_MBUF_F_TX_UDP_CKSUM:
547                                 gdesc->txd.msscof = gdesc->txd.hlen +
548                                         offsetof(struct rte_udp_hdr,
549                                                 dgram_cksum);
550                                 break;
551                         default:
552                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
553                                            txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
554                                 abort();
555                         }
556                         deferred++;
557                 } else {
558                         gdesc->txd.hlen = 0;
559                         gdesc->txd.om = VMXNET3_OM_NONE;
560                         gdesc->txd.msscof = 0;
561                         deferred++;
562                 }
563
564                 /* flip the GEN bit on the SOP */
565                 rte_compiler_barrier();
566                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
567
568                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
569                 nb_tx++;
570         }
571
572         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
573
574         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
575                 txq_ctrl->txNumDeferred = 0;
576                 /* Notify vSwitch that packets are available. */
577                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
578                                        txq->cmd_ring.next2fill);
579         }
580
581         return nb_tx;
582 }
583
584 static inline void
585 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
586                    struct rte_mbuf *mbuf)
587 {
588         uint32_t val;
589         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
590         struct Vmxnet3_RxDesc *rxd =
591                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
592         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
593
594         if (ring_id == 0) {
595                 /* Usually: One HEAD type buf per packet
596                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
597                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
598                  */
599
600                 /* We use single packet buffer so all heads here */
601                 val = VMXNET3_RXD_BTYPE_HEAD;
602         } else {
603                 /* All BODY type buffers for 2nd ring */
604                 val = VMXNET3_RXD_BTYPE_BODY;
605         }
606
607         /*
608          * Load mbuf pointer into buf_info[ring_size]
609          * buf_info structure is equivalent to cookie for virtio-virtqueue
610          */
611         buf_info->m = mbuf;
612         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
613         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
614
615         /* Load Rx Descriptor with the buffer's GPA */
616         rxd->addr = buf_info->bufPA;
617
618         /* After this point rxd->addr MUST not be NULL */
619         rxd->btype = val;
620         rxd->len = buf_info->len;
621         /* Flip gen bit at the end to change ownership */
622         rxd->gen = ring->gen;
623
624         vmxnet3_cmd_ring_adv_next2fill(ring);
625 }
626 /*
627  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
628  *  so that device can receive packets in those buffers.
629  *  Ring layout:
630  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
631  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
632  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
633  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
634  *      only for LRO.
635  */
636 static int
637 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
638 {
639         int err = 0;
640         uint32_t i = 0;
641         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
642
643         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
644                 struct rte_mbuf *mbuf;
645
646                 /* Allocate blank mbuf for the current Rx Descriptor */
647                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
648                 if (unlikely(mbuf == NULL)) {
649                         PMD_RX_LOG(ERR, "Error allocating mbuf");
650                         rxq->stats.rx_buf_alloc_failure++;
651                         err = ENOMEM;
652                         break;
653                 }
654
655                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
656                 i++;
657         }
658
659         /* Return error only if no buffers are posted at present */
660         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
661                 return -err;
662         else
663                 return i;
664 }
665
666 /* MSS not provided by vmxnet3, guess one with available information */
667 static uint16_t
668 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
669                 struct rte_mbuf *rxm)
670 {
671         uint32_t hlen, slen;
672         struct rte_ipv4_hdr *ipv4_hdr;
673         struct rte_ipv6_hdr *ipv6_hdr;
674         struct rte_tcp_hdr *tcp_hdr;
675         char *ptr;
676         uint8_t segs;
677
678         RTE_ASSERT(rcd->tcp);
679
680         ptr = rte_pktmbuf_mtod(rxm, char *);
681         slen = rte_pktmbuf_data_len(rxm);
682         hlen = sizeof(struct rte_ether_hdr);
683
684         if (rcd->v4) {
685                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
686                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
687                                         - sizeof(struct rte_tcp_hdr);
688
689                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
690                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
691         } else if (rcd->v6) {
692                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
693                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
694                                         sizeof(struct rte_tcp_hdr);
695
696                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
697                 hlen += sizeof(struct rte_ipv6_hdr);
698                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
699                         int frag;
700
701                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
702                                         &hlen, &frag);
703                 }
704         }
705
706         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
707                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
708                                 sizeof(struct rte_ether_hdr);
709
710         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
711         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
712
713         segs = *vmxnet3_segs_dynfield(rxm);
714         if (segs > 1)
715                 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
716         else
717                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
718 }
719
720 /* Receive side checksum and other offloads */
721 static inline void
722 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
723                 struct rte_mbuf *rxm, const uint8_t sop)
724 {
725         uint64_t ol_flags = rxm->ol_flags;
726         uint32_t packet_type = rxm->packet_type;
727
728         /* Offloads set in sop */
729         if (sop) {
730                 /* Set packet type */
731                 packet_type |= RTE_PTYPE_L2_ETHER;
732
733                 /* Check large packet receive */
734                 if (VMXNET3_VERSION_GE_2(hw) &&
735                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
736                         const Vmxnet3_RxCompDescExt *rcde =
737                                         (const Vmxnet3_RxCompDescExt *)rcd;
738
739                         rxm->tso_segsz = rcde->mss;
740                         *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
741                         ol_flags |= RTE_MBUF_F_RX_LRO;
742                 }
743         } else { /* Offloads set in eop */
744                 /* Check for RSS */
745                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
746                         ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
747                         rxm->hash.rss = rcd->rssHash;
748                 }
749
750                 /* Check for hardware stripped VLAN tag */
751                 if (rcd->ts) {
752                         ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
753                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
754                 }
755
756                 /* Check packet type, checksum errors, etc. */
757                 if (rcd->cnc) {
758                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
759                 } else {
760                         if (rcd->v4) {
761                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
762
763                                 if (rcd->ipc)
764                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
765                                 else
766                                         ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
767
768                                 if (rcd->tuc) {
769                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
770                                         if (rcd->tcp)
771                                                 packet_type |= RTE_PTYPE_L4_TCP;
772                                         else
773                                                 packet_type |= RTE_PTYPE_L4_UDP;
774                                 } else {
775                                         if (rcd->tcp) {
776                                                 packet_type |= RTE_PTYPE_L4_TCP;
777                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
778                                         } else if (rcd->udp) {
779                                                 packet_type |= RTE_PTYPE_L4_UDP;
780                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
781                                         }
782                                 }
783                         } else if (rcd->v6) {
784                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
785
786                                 if (rcd->tuc) {
787                                         ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
788                                         if (rcd->tcp)
789                                                 packet_type |= RTE_PTYPE_L4_TCP;
790                                         else
791                                                 packet_type |= RTE_PTYPE_L4_UDP;
792                                 } else {
793                                         if (rcd->tcp) {
794                                                 packet_type |= RTE_PTYPE_L4_TCP;
795                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
796                                         } else if (rcd->udp) {
797                                                 packet_type |= RTE_PTYPE_L4_UDP;
798                                                 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
799                                         }
800                                 }
801                         } else {
802                                 packet_type |= RTE_PTYPE_UNKNOWN;
803                         }
804
805                         /* Old variants of vmxnet3 do not provide MSS */
806                         if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
807                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
808                                                 rcd, rxm);
809                 }
810         }
811
812         rxm->ol_flags = ol_flags;
813         rxm->packet_type = packet_type;
814 }
815
816 /*
817  * Process the Rx Completion Ring of given vmxnet3_rx_queue
818  * for nb_pkts burst and return the number of packets received
819  */
820 uint16_t
821 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
822 {
823         uint16_t nb_rx;
824         uint32_t nb_rxd, idx;
825         uint8_t ring_idx;
826         vmxnet3_rx_queue_t *rxq;
827         Vmxnet3_RxCompDesc *rcd;
828         vmxnet3_buf_info_t *rbi;
829         Vmxnet3_RxDesc *rxd;
830         struct rte_mbuf *rxm = NULL;
831         struct vmxnet3_hw *hw;
832
833         nb_rx = 0;
834         ring_idx = 0;
835         nb_rxd = 0;
836         idx = 0;
837
838         rxq = rx_queue;
839         hw = rxq->hw;
840
841         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
842
843         if (unlikely(rxq->stopped)) {
844                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
845                 return 0;
846         }
847
848         while (rcd->gen == rxq->comp_ring.gen) {
849                 struct rte_mbuf *newm;
850
851                 if (nb_rx >= nb_pkts)
852                         break;
853
854                 newm = rte_mbuf_raw_alloc(rxq->mp);
855                 if (unlikely(newm == NULL)) {
856                         PMD_RX_LOG(ERR, "Error allocating mbuf");
857                         rxq->stats.rx_buf_alloc_failure++;
858                         break;
859                 }
860
861                 idx = rcd->rxdIdx;
862                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
863                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
864                 RTE_SET_USED(rxd); /* used only for assert when enabled */
865                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
866
867                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
868
869                 RTE_ASSERT(rcd->len <= rxd->len);
870                 RTE_ASSERT(rbi->m);
871
872                 /* Get the packet buffer pointer from buf_info */
873                 rxm = rbi->m;
874
875                 /* Clear descriptor associated buf_info to be reused */
876                 rbi->m = NULL;
877                 rbi->bufPA = 0;
878
879                 /* Update the index that we received a packet */
880                 rxq->cmd_ring[ring_idx].next2comp = idx;
881
882                 /* For RCD with EOP set, check if there is frame error */
883                 if (unlikely(rcd->eop && rcd->err)) {
884                         rxq->stats.drop_total++;
885                         rxq->stats.drop_err++;
886
887                         if (!rcd->fcs) {
888                                 rxq->stats.drop_fcs++;
889                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
890                         }
891                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
892                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
893                                          rxq->comp_ring.base), rcd->rxdIdx);
894                         rte_pktmbuf_free_seg(rxm);
895                         if (rxq->start_seg) {
896                                 struct rte_mbuf *start = rxq->start_seg;
897
898                                 rxq->start_seg = NULL;
899                                 rte_pktmbuf_free(start);
900                         }
901                         goto rcd_done;
902                 }
903
904                 /* Initialize newly received packet buffer */
905                 rxm->port = rxq->port_id;
906                 rxm->nb_segs = 1;
907                 rxm->next = NULL;
908                 rxm->pkt_len = (uint16_t)rcd->len;
909                 rxm->data_len = (uint16_t)rcd->len;
910                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
911                 rxm->ol_flags = 0;
912                 rxm->vlan_tci = 0;
913                 rxm->packet_type = 0;
914
915                 /*
916                  * If this is the first buffer of the received packet,
917                  * set the pointer to the first mbuf of the packet
918                  * Otherwise, update the total length and the number of segments
919                  * of the current scattered packet, and update the pointer to
920                  * the last mbuf of the current packet.
921                  */
922                 if (rcd->sop) {
923                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
924
925                         if (unlikely(rcd->len == 0)) {
926                                 RTE_ASSERT(rcd->eop);
927
928                                 PMD_RX_LOG(DEBUG,
929                                            "Rx buf was skipped. rxring[%d][%d])",
930                                            ring_idx, idx);
931                                 rte_pktmbuf_free_seg(rxm);
932                                 goto rcd_done;
933                         }
934
935                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
936                                 uint8_t *rdd = rxq->data_ring.base +
937                                         idx * rxq->data_desc_size;
938
939                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
940                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
941                                            rdd, rcd->len);
942                         }
943
944                         rxq->start_seg = rxm;
945                         rxq->last_seg = rxm;
946                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
947                 } else {
948                         struct rte_mbuf *start = rxq->start_seg;
949
950                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
951
952                         if (likely(start && rxm->data_len > 0)) {
953                                 start->pkt_len += rxm->data_len;
954                                 start->nb_segs++;
955
956                                 rxq->last_seg->next = rxm;
957                                 rxq->last_seg = rxm;
958                         } else {
959                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
960                                 rxq->stats.drop_total++;
961                                 rxq->stats.drop_err++;
962
963                                 rte_pktmbuf_free_seg(rxm);
964                         }
965                 }
966
967                 if (rcd->eop) {
968                         struct rte_mbuf *start = rxq->start_seg;
969
970                         vmxnet3_rx_offload(hw, rcd, start, 0);
971                         rx_pkts[nb_rx++] = start;
972                         rxq->start_seg = NULL;
973                 }
974
975 rcd_done:
976                 rxq->cmd_ring[ring_idx].next2comp = idx;
977                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
978                                           rxq->cmd_ring[ring_idx].size);
979
980                 /* It's time to renew descriptors */
981                 vmxnet3_renew_desc(rxq, ring_idx, newm);
982                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
983                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
984                                                rxq->cmd_ring[ring_idx].next2fill);
985                 }
986
987                 /* Advance to the next descriptor in comp_ring */
988                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
989
990                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
991                 nb_rxd++;
992                 if (nb_rxd > rxq->cmd_ring[0].size) {
993                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
994                                    " relinquish control.");
995                         break;
996                 }
997         }
998
999         if (unlikely(nb_rxd == 0)) {
1000                 uint32_t avail;
1001                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1003                         if (unlikely(avail > 0)) {
1004                                 /* try to alloc new buf and renew descriptors */
1005                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1006                         }
1007                 }
1008                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1009                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1010                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1011                                                        rxq->cmd_ring[ring_idx].next2fill);
1012                         }
1013                 }
1014         }
1015
1016         return nb_rx;
1017 }
1018
1019 int
1020 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1021                            uint16_t queue_idx,
1022                            uint16_t nb_desc,
1023                            unsigned int socket_id,
1024                            const struct rte_eth_txconf *tx_conf __rte_unused)
1025 {
1026         struct vmxnet3_hw *hw = dev->data->dev_private;
1027         const struct rte_memzone *mz;
1028         struct vmxnet3_tx_queue *txq;
1029         struct vmxnet3_cmd_ring *ring;
1030         struct vmxnet3_comp_ring *comp_ring;
1031         struct vmxnet3_data_ring *data_ring;
1032         int size;
1033
1034         PMD_INIT_FUNC_TRACE();
1035
1036         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1037                           RTE_CACHE_LINE_SIZE);
1038         if (txq == NULL) {
1039                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1040                 return -ENOMEM;
1041         }
1042
1043         txq->queue_id = queue_idx;
1044         txq->port_id = dev->data->port_id;
1045         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1046         txq->hw = hw;
1047         txq->qid = queue_idx;
1048         txq->stopped = TRUE;
1049         txq->txdata_desc_size = hw->txdata_desc_size;
1050
1051         ring = &txq->cmd_ring;
1052         comp_ring = &txq->comp_ring;
1053         data_ring = &txq->data_ring;
1054
1055         /* Tx vmxnet ring length should be between 512-4096 */
1056         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1057                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1058                              VMXNET3_DEF_TX_RING_SIZE);
1059                 return -EINVAL;
1060         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1061                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1062                              VMXNET3_TX_RING_MAX_SIZE);
1063                 return -EINVAL;
1064         } else {
1065                 ring->size = nb_desc;
1066                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1067         }
1068         comp_ring->size = data_ring->size = ring->size;
1069
1070         /* Tx vmxnet rings structure initialization*/
1071         ring->next2fill = 0;
1072         ring->next2comp = 0;
1073         ring->gen = VMXNET3_INIT_GEN;
1074         comp_ring->next2proc = 0;
1075         comp_ring->gen = VMXNET3_INIT_GEN;
1076
1077         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1078         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1079         size += txq->txdata_desc_size * data_ring->size;
1080
1081         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1082                                       VMXNET3_RING_BA_ALIGN, socket_id);
1083         if (mz == NULL) {
1084                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1085                 return -ENOMEM;
1086         }
1087         txq->mz = mz;
1088         memset(mz->addr, 0, mz->len);
1089
1090         /* cmd_ring initialization */
1091         ring->base = mz->addr;
1092         ring->basePA = mz->iova;
1093
1094         /* comp_ring initialization */
1095         comp_ring->base = ring->base + ring->size;
1096         comp_ring->basePA = ring->basePA +
1097                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1098
1099         /* data_ring initialization */
1100         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1101         data_ring->basePA = comp_ring->basePA +
1102                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1103
1104         /* cmd_ring0 buf_info allocation */
1105         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1106                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1107         if (ring->buf_info == NULL) {
1108                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1109                 return -ENOMEM;
1110         }
1111
1112         /* Update the data portion with txq */
1113         dev->data->tx_queues[queue_idx] = txq;
1114
1115         return 0;
1116 }
1117
1118 int
1119 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1120                            uint16_t queue_idx,
1121                            uint16_t nb_desc,
1122                            unsigned int socket_id,
1123                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1124                            struct rte_mempool *mp)
1125 {
1126         const struct rte_memzone *mz;
1127         struct vmxnet3_rx_queue *rxq;
1128         struct vmxnet3_hw *hw = dev->data->dev_private;
1129         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1130         struct vmxnet3_comp_ring *comp_ring;
1131         struct vmxnet3_rx_data_ring *data_ring;
1132         int size;
1133         uint8_t i;
1134         char mem_name[32];
1135
1136         PMD_INIT_FUNC_TRACE();
1137
1138         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1139                           RTE_CACHE_LINE_SIZE);
1140         if (rxq == NULL) {
1141                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1142                 return -ENOMEM;
1143         }
1144
1145         rxq->mp = mp;
1146         rxq->queue_id = queue_idx;
1147         rxq->port_id = dev->data->port_id;
1148         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1149         rxq->hw = hw;
1150         rxq->qid1 = queue_idx;
1151         rxq->qid2 = queue_idx + hw->num_rx_queues;
1152         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1153         rxq->data_desc_size = hw->rxdata_desc_size;
1154         rxq->stopped = TRUE;
1155
1156         ring0 = &rxq->cmd_ring[0];
1157         ring1 = &rxq->cmd_ring[1];
1158         comp_ring = &rxq->comp_ring;
1159         data_ring = &rxq->data_ring;
1160
1161         /* Rx vmxnet rings length should be between 256-4096 */
1162         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1163                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1164                 return -EINVAL;
1165         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1166                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1167                 return -EINVAL;
1168         } else {
1169                 ring0->size = nb_desc;
1170                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1171                 ring1->size = ring0->size;
1172         }
1173
1174         comp_ring->size = ring0->size + ring1->size;
1175         data_ring->size = ring0->size;
1176
1177         /* Rx vmxnet rings structure initialization */
1178         ring0->next2fill = 0;
1179         ring1->next2fill = 0;
1180         ring0->next2comp = 0;
1181         ring1->next2comp = 0;
1182         ring0->gen = VMXNET3_INIT_GEN;
1183         ring1->gen = VMXNET3_INIT_GEN;
1184         comp_ring->next2proc = 0;
1185         comp_ring->gen = VMXNET3_INIT_GEN;
1186
1187         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1188         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1189         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1190                 size += rxq->data_desc_size * data_ring->size;
1191
1192         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1193                                       VMXNET3_RING_BA_ALIGN, socket_id);
1194         if (mz == NULL) {
1195                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1196                 return -ENOMEM;
1197         }
1198         rxq->mz = mz;
1199         memset(mz->addr, 0, mz->len);
1200
1201         /* cmd_ring0 initialization */
1202         ring0->base = mz->addr;
1203         ring0->basePA = mz->iova;
1204
1205         /* cmd_ring1 initialization */
1206         ring1->base = ring0->base + ring0->size;
1207         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1208
1209         /* comp_ring initialization */
1210         comp_ring->base = ring1->base + ring1->size;
1211         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1212                 ring1->size;
1213
1214         /* data_ring initialization */
1215         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1216                 data_ring->base =
1217                         (uint8_t *)(comp_ring->base + comp_ring->size);
1218                 data_ring->basePA = comp_ring->basePA +
1219                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1220         }
1221
1222         /* cmd_ring0-cmd_ring1 buf_info allocation */
1223         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1224
1225                 ring = &rxq->cmd_ring[i];
1226                 ring->rid = i;
1227                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1228
1229                 ring->buf_info = rte_zmalloc(mem_name,
1230                                              ring->size * sizeof(vmxnet3_buf_info_t),
1231                                              RTE_CACHE_LINE_SIZE);
1232                 if (ring->buf_info == NULL) {
1233                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1234                         return -ENOMEM;
1235                 }
1236         }
1237
1238         /* Update the data portion with rxq */
1239         dev->data->rx_queues[queue_idx] = rxq;
1240
1241         return 0;
1242 }
1243
1244 /*
1245  * Initializes Receive Unit
1246  * Load mbufs in rx queue in advance
1247  */
1248 int
1249 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1250 {
1251         struct vmxnet3_hw *hw = dev->data->dev_private;
1252
1253         int i, ret;
1254         uint8_t j;
1255
1256         PMD_INIT_FUNC_TRACE();
1257
1258         for (i = 0; i < hw->num_rx_queues; i++) {
1259                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1260
1261                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1262                         /* Passing 0 as alloc_num will allocate full ring */
1263                         ret = vmxnet3_post_rx_bufs(rxq, j);
1264                         if (ret <= 0) {
1265                                 PMD_INIT_LOG(ERR,
1266                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1267                                              i, j);
1268                                 return -ret;
1269                         }
1270                         /*
1271                          * Updating device with the index:next2fill to fill the
1272                          * mbufs for coming packets.
1273                          */
1274                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1275                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1276                                                        rxq->cmd_ring[j].next2fill);
1277                         }
1278                 }
1279                 rxq->stopped = FALSE;
1280                 rxq->start_seg = NULL;
1281         }
1282
1283         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1284                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1285
1286                 txq->stopped = FALSE;
1287         }
1288
1289         return 0;
1290 }
1291
1292 static uint8_t rss_intel_key[40] = {
1293         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1294         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1295         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1296         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1297         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1298 };
1299
1300 /*
1301  * Additional RSS configurations based on vmxnet v4+ APIs
1302  */
1303 int
1304 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1305 {
1306         struct vmxnet3_hw *hw = dev->data->dev_private;
1307         Vmxnet3_DriverShared *shared = hw->shared;
1308         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1309         struct rte_eth_rss_conf *port_rss_conf;
1310         uint64_t rss_hf;
1311         uint32_t ret;
1312
1313         PMD_INIT_FUNC_TRACE();
1314
1315         cmdInfo->setRSSFields = 0;
1316         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1317
1318         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1319             VMXNET3_MANDATORY_V4_RSS) {
1320                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1321                              "automatically setting it");
1322                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1323         }
1324
1325         rss_hf = port_rss_conf->rss_hf &
1326                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1327
1328         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1329                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1330         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1331                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1332         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1333                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1334         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1335                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1336
1337         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1338                                VMXNET3_CMD_SET_RSS_FIELDS);
1339         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1340
1341         if (ret != VMXNET3_SUCCESS) {
1342                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1343         }
1344
1345         return ret;
1346 }
1347
1348 /*
1349  * Configure RSS feature
1350  */
1351 int
1352 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1353 {
1354         struct vmxnet3_hw *hw = dev->data->dev_private;
1355         struct VMXNET3_RSSConf *dev_rss_conf;
1356         struct rte_eth_rss_conf *port_rss_conf;
1357         uint64_t rss_hf;
1358         uint8_t i, j;
1359
1360         PMD_INIT_FUNC_TRACE();
1361
1362         dev_rss_conf = hw->rss_conf;
1363         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1364
1365         /* loading hashFunc */
1366         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1367         /* loading hashKeySize */
1368         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1369         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1370         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1371
1372         if (port_rss_conf->rss_key == NULL) {
1373                 /* Default hash key */
1374                 port_rss_conf->rss_key = rss_intel_key;
1375         }
1376
1377         /* loading hashKey */
1378         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1379                dev_rss_conf->hashKeySize);
1380
1381         /* loading indTable */
1382         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1383                 if (j == dev->data->nb_rx_queues)
1384                         j = 0;
1385                 dev_rss_conf->indTable[i] = j;
1386         }
1387
1388         /* loading hashType */
1389         dev_rss_conf->hashType = 0;
1390         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1391         if (rss_hf & RTE_ETH_RSS_IPV4)
1392                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1393         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1394                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1395         if (rss_hf & RTE_ETH_RSS_IPV6)
1396                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1397         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1398                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1399
1400         return VMXNET3_SUCCESS;
1401 }