net: add rte prefix to IP defines
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_IPV6 |     \
54                 PKT_TX_IPV4 |     \
55                 PKT_TX_L4_MASK |  \
56                 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
59         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74         uint32_t avail = 0;
75
76         if (rxq == NULL)
77                 return;
78
79         PMD_RX_LOG(DEBUG,
80                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82         PMD_RX_LOG(DEBUG,
83                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84                    (unsigned long)rxq->cmd_ring[0].basePA,
85                    (unsigned long)rxq->cmd_ring[1].basePA,
86                    (unsigned long)rxq->comp_ring.basePA);
87
88         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89         PMD_RX_LOG(DEBUG,
90                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91                    (uint32_t)rxq->cmd_ring[0].size, avail,
92                    rxq->comp_ring.next2proc,
93                    rxq->cmd_ring[0].size - avail);
94
95         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98                    rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105         uint32_t avail = 0;
106
107         if (txq == NULL)
108                 return;
109
110         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113                    (unsigned long)txq->cmd_ring.basePA,
114                    (unsigned long)txq->comp_ring.basePA,
115                    (unsigned long)txq->data_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119                    (uint32_t)txq->cmd_ring.size, avail,
120                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127         while (ring->next2comp != ring->next2fill) {
128                 /* No need to worry about desc ownership, device is quiesced by now. */
129                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131                 if (buf_info->m) {
132                         rte_pktmbuf_free(buf_info->m);
133                         buf_info->m = NULL;
134                         buf_info->bufPA = 0;
135                         buf_info->len = 0;
136                 }
137                 vmxnet3_cmd_ring_adv_next2comp(ring);
138         }
139 }
140
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144         uint32_t i;
145
146         for (i = 0; i < ring->size; i++) {
147                 /* No need to worry about desc ownership, device is quiesced by now. */
148                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150                 if (buf_info->m) {
151                         rte_pktmbuf_free_seg(buf_info->m);
152                         buf_info->m = NULL;
153                         buf_info->bufPA = 0;
154                         buf_info->len = 0;
155                 }
156                 vmxnet3_cmd_ring_adv_next2comp(ring);
157         }
158 }
159
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170         vmxnet3_tx_queue_t *tq = txq;
171
172         if (tq != NULL) {
173                 /* Release mbufs */
174                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177                 /* Release the memzone */
178                 rte_memzone_free(tq->mz);
179                 /* Release the queue */
180                 rte_free(tq);
181         }
182 }
183
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187         int i;
188         vmxnet3_rx_queue_t *rq = rxq;
189
190         if (rq != NULL) {
191                 /* Release mbufs */
192                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195                 /* Release both the cmd_rings */
196                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199                 /* Release the memzone */
200                 rte_memzone_free(rq->mz);
201
202                 /* Release the queue */
203                 rte_free(rq);
204         }
205 }
206
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210         vmxnet3_tx_queue_t *tq = txq;
211         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214         int size;
215
216         if (tq != NULL) {
217                 /* Release the cmd_ring mbufs */
218                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219         }
220
221         /* Tx vmxnet rings structure initialization*/
222         ring->next2fill = 0;
223         ring->next2comp = 0;
224         ring->gen = VMXNET3_INIT_GEN;
225         comp_ring->next2proc = 0;
226         comp_ring->gen = VMXNET3_INIT_GEN;
227
228         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230         size += tq->txdata_desc_size * data_ring->size;
231
232         memset(ring->base, 0, size);
233 }
234
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238         int i;
239         vmxnet3_rx_queue_t *rq = rxq;
240         struct vmxnet3_hw *hw = rq->hw;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244         int size;
245
246         /* Release both the cmd_rings mbufs */
247         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250         ring0 = &rq->cmd_ring[0];
251         ring1 = &rq->cmd_ring[1];
252         comp_ring = &rq->comp_ring;
253
254         /* Rx vmxnet rings structure initialization */
255         ring0->next2fill = 0;
256         ring1->next2fill = 0;
257         ring0->next2comp = 0;
258         ring1->next2comp = 0;
259         ring0->gen = VMXNET3_INIT_GEN;
260         ring1->gen = VMXNET3_INIT_GEN;
261         comp_ring->next2proc = 0;
262         comp_ring->gen = VMXNET3_INIT_GEN;
263
264         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267                 size += rq->data_desc_size * data_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303
304         /* Release cmd_ring descriptor and free mbuf */
305         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308         if (mbuf == NULL)
309                 rte_panic("EOP desc does not point to a valid mbuf");
310         rte_pktmbuf_free(mbuf);
311
312         txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314         while (txq->cmd_ring.next2comp != eop_idx) {
315                 /* no out-of-order completion */
316                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318                 completed++;
319         }
320
321         /* Mark the txd for which tcd was generated as completed */
322         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324         return completed + 1;
325 }
326
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330         int completed = 0;
331         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333                 (comp_ring->base + comp_ring->next2proc);
334
335         while (tcd->gen == comp_ring->gen) {
336                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340                                                     comp_ring->next2proc);
341         }
342
343         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348         uint16_t nb_pkts)
349 {
350         int32_t ret;
351         uint32_t i;
352         uint64_t ol_flags;
353         struct rte_mbuf *m;
354
355         for (i = 0; i != nb_pkts; i++) {
356                 m = tx_pkts[i];
357                 ol_flags = m->ol_flags;
358
359                 /* Non-TSO packet cannot occupy more than
360                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361                  */
362                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364                         rte_errno = -EINVAL;
365                         return i;
366                 }
367
368                 /* check that only supported TX offloads are requested. */
369                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370                                 (ol_flags & PKT_TX_L4_MASK) ==
371                                 PKT_TX_SCTP_CKSUM) {
372                         rte_errno = -ENOTSUP;
373                         return i;
374                 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377                 ret = rte_validate_tx_offload(m);
378                 if (ret != 0) {
379                         rte_errno = ret;
380                         return i;
381                 }
382 #endif
383                 ret = rte_net_intel_cksum_prepare(m);
384                 if (ret != 0) {
385                         rte_errno = ret;
386                         return i;
387                 }
388         }
389
390         return i;
391 }
392
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395                   uint16_t nb_pkts)
396 {
397         uint16_t nb_tx;
398         vmxnet3_tx_queue_t *txq = tx_queue;
399         struct vmxnet3_hw *hw = txq->hw;
400         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403         if (unlikely(txq->stopped)) {
404                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405                 return 0;
406         }
407
408         /* Free up the comp_descriptors aggressively */
409         vmxnet3_tq_tx_complete(txq);
410
411         nb_tx = 0;
412         while (nb_tx < nb_pkts) {
413                 Vmxnet3_GenericDesc *gdesc;
414                 vmxnet3_buf_info_t *tbi;
415                 uint32_t first2fill, avail, dw2;
416                 struct rte_mbuf *txm = tx_pkts[nb_tx];
417                 struct rte_mbuf *m_seg = txm;
418                 int copy_size = 0;
419                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420                 /* # of descriptors needed for a packet. */
421                 unsigned count = txm->nb_segs;
422
423                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424                 if (count > avail) {
425                         /* Is command ring full? */
426                         if (unlikely(avail == 0)) {
427                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428                                 txq->stats.tx_ring_full++;
429                                 txq->stats.drop_total += (nb_pkts - nb_tx);
430                                 break;
431                         }
432
433                         /* Command ring is not full but cannot handle the
434                          * multi-segmented packet. Let's try the next packet
435                          * in this case.
436                          */
437                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438                                    "(avail %d needed %d)", avail, count);
439                         txq->stats.drop_total++;
440                         if (tso)
441                                 txq->stats.drop_tso++;
442                         rte_pktmbuf_free(txm);
443                         nb_tx++;
444                         continue;
445                 }
446
447                 /* Drop non-TSO packet that is excessively fragmented */
448                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451                         txq->stats.drop_too_many_segs++;
452                         txq->stats.drop_total++;
453                         rte_pktmbuf_free(txm);
454                         nb_tx++;
455                         continue;
456                 }
457
458                 if (txm->nb_segs == 1 &&
459                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460                         struct Vmxnet3_TxDataDesc *tdd;
461
462                         /* Skip empty packets */
463                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464                                 txq->stats.drop_total++;
465                                 rte_pktmbuf_free(txm);
466                                 nb_tx++;
467                                 continue;
468                         }
469
470                         tdd = (struct Vmxnet3_TxDataDesc *)
471                                 ((uint8 *)txq->data_ring.base +
472                                  txq->cmd_ring.next2fill *
473                                  txq->txdata_desc_size);
474                         copy_size = rte_pktmbuf_pkt_len(txm);
475                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476                 }
477
478                 /* use the previous gen bit for the SOP desc */
479                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480                 first2fill = txq->cmd_ring.next2fill;
481                 do {
482                         /* Remember the transmit buffer for cleanup */
483                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485                         /* NB: the following assumes that VMXNET3 maximum
486                          * transmit buffer size (16K) is greater than
487                          * maximum size of mbuf segment size.
488                          */
489                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491                         /* Skip empty segments */
492                         if (unlikely(m_seg->data_len == 0))
493                                 continue;
494
495                         if (copy_size) {
496                                 uint64 offset =
497                                         (uint64)txq->cmd_ring.next2fill *
498                                                         txq->txdata_desc_size;
499                                 gdesc->txd.addr =
500                                         rte_cpu_to_le_64(txq->data_ring.basePA +
501                                                          offset);
502                         } else {
503                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504                         }
505
506                         gdesc->dword[2] = dw2 | m_seg->data_len;
507                         gdesc->dword[3] = 0;
508
509                         /* move to the next2fill descriptor */
510                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512                         /* use the right gen for non-SOP desc */
513                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514                 } while ((m_seg = m_seg->next) != NULL);
515
516                 /* set the last buf_info for the pkt */
517                 tbi->m = txm;
518                 /* Update the EOP descriptor */
519                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521                 /* Add VLAN tag if present */
522                 gdesc = txq->cmd_ring.base + first2fill;
523                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524                         gdesc->txd.ti = 1;
525                         gdesc->txd.tci = txm->vlan_tci;
526                 }
527
528                 if (tso) {
529                         uint16_t mss = txm->tso_segsz;
530
531                         RTE_ASSERT(mss > 0);
532
533                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534                         gdesc->txd.om = VMXNET3_OM_TSO;
535                         gdesc->txd.msscof = mss;
536
537                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539                         gdesc->txd.om = VMXNET3_OM_CSUM;
540                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
543                         case PKT_TX_TCP_CKSUM:
544                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
545                                 break;
546                         case PKT_TX_UDP_CKSUM:
547                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
548                                 break;
549                         default:
550                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
551                                            txm->ol_flags & PKT_TX_L4_MASK);
552                                 abort();
553                         }
554                         deferred++;
555                 } else {
556                         gdesc->txd.hlen = 0;
557                         gdesc->txd.om = VMXNET3_OM_NONE;
558                         gdesc->txd.msscof = 0;
559                         deferred++;
560                 }
561
562                 /* flip the GEN bit on the SOP */
563                 rte_compiler_barrier();
564                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
565
566                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
567                 nb_tx++;
568         }
569
570         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
571
572         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
573                 txq_ctrl->txNumDeferred = 0;
574                 /* Notify vSwitch that packets are available. */
575                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
576                                        txq->cmd_ring.next2fill);
577         }
578
579         return nb_tx;
580 }
581
582 static inline void
583 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
584                    struct rte_mbuf *mbuf)
585 {
586         uint32_t val;
587         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
588         struct Vmxnet3_RxDesc *rxd =
589                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
590         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
591
592         if (ring_id == 0) {
593                 /* Usually: One HEAD type buf per packet
594                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
595                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
596                  */
597
598                 /* We use single packet buffer so all heads here */
599                 val = VMXNET3_RXD_BTYPE_HEAD;
600         } else {
601                 /* All BODY type buffers for 2nd ring */
602                 val = VMXNET3_RXD_BTYPE_BODY;
603         }
604
605         /*
606          * Load mbuf pointer into buf_info[ring_size]
607          * buf_info structure is equivalent to cookie for virtio-virtqueue
608          */
609         buf_info->m = mbuf;
610         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
611         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
612
613         /* Load Rx Descriptor with the buffer's GPA */
614         rxd->addr = buf_info->bufPA;
615
616         /* After this point rxd->addr MUST not be NULL */
617         rxd->btype = val;
618         rxd->len = buf_info->len;
619         /* Flip gen bit at the end to change ownership */
620         rxd->gen = ring->gen;
621
622         vmxnet3_cmd_ring_adv_next2fill(ring);
623 }
624 /*
625  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
626  *  so that device can receive packets in those buffers.
627  *  Ring layout:
628  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
629  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
630  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
631  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
632  *      only for LRO.
633  */
634 static int
635 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
636 {
637         int err = 0;
638         uint32_t i = 0;
639         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
640
641         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
642                 struct rte_mbuf *mbuf;
643
644                 /* Allocate blank mbuf for the current Rx Descriptor */
645                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
646                 if (unlikely(mbuf == NULL)) {
647                         PMD_RX_LOG(ERR, "Error allocating mbuf");
648                         rxq->stats.rx_buf_alloc_failure++;
649                         err = ENOMEM;
650                         break;
651                 }
652
653                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
654                 i++;
655         }
656
657         /* Return error only if no buffers are posted at present */
658         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
659                 return -err;
660         else
661                 return i;
662 }
663
664 /* MSS not provided by vmxnet3, guess one with available information */
665 static uint16_t
666 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
667                 struct rte_mbuf *rxm)
668 {
669         uint32_t hlen, slen;
670         struct rte_ipv4_hdr *ipv4_hdr;
671         struct rte_ipv6_hdr *ipv6_hdr;
672         struct tcp_hdr *tcp_hdr;
673         char *ptr;
674
675         RTE_ASSERT(rcd->tcp);
676
677         ptr = rte_pktmbuf_mtod(rxm, char *);
678         slen = rte_pktmbuf_data_len(rxm);
679         hlen = sizeof(struct rte_ether_hdr);
680
681         if (rcd->v4) {
682                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
683                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
684                                         - sizeof(struct tcp_hdr);
685
686                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
687                 hlen += (ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
688                                 RTE_IPV4_IHL_MULTIPLIER;
689         } else if (rcd->v6) {
690                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
691                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
692                                         sizeof(struct tcp_hdr);
693
694                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
695                 hlen += sizeof(struct rte_ipv6_hdr);
696                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
697                         int frag;
698
699                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
700                                         &hlen, &frag);
701                 }
702         }
703
704         if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
705                 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
706                                 sizeof(struct rte_ether_hdr);
707
708         tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
709         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
710
711         if (rxm->udata64 > 1)
712                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
713                                 rxm->udata64 - 1) / rxm->udata64;
714         else
715                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
716 }
717
718 /* Receive side checksum and other offloads */
719 static inline void
720 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
721                 struct rte_mbuf *rxm, const uint8_t sop)
722 {
723         uint64_t ol_flags = rxm->ol_flags;
724         uint32_t packet_type = rxm->packet_type;
725
726         /* Offloads set in sop */
727         if (sop) {
728                 /* Set packet type */
729                 packet_type |= RTE_PTYPE_L2_ETHER;
730
731                 /* Check large packet receive */
732                 if (VMXNET3_VERSION_GE_2(hw) &&
733                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
734                         const Vmxnet3_RxCompDescExt *rcde =
735                                         (const Vmxnet3_RxCompDescExt *)rcd;
736
737                         rxm->tso_segsz = rcde->mss;
738                         rxm->udata64 = rcde->segCnt;
739                         ol_flags |= PKT_RX_LRO;
740                 }
741         } else { /* Offloads set in eop */
742                 /* Check for RSS */
743                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
744                         ol_flags |= PKT_RX_RSS_HASH;
745                         rxm->hash.rss = rcd->rssHash;
746                 }
747
748                 /* Check for hardware stripped VLAN tag */
749                 if (rcd->ts) {
750                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
751                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
752                 }
753
754                 /* Check packet type, checksum errors, etc. */
755                 if (rcd->cnc) {
756                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
757                 } else {
758                         if (rcd->v4) {
759                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
760
761                                 if (rcd->ipc)
762                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
763                                 else
764                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
765
766                                 if (rcd->tuc) {
767                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
768                                         if (rcd->tcp)
769                                                 packet_type |= RTE_PTYPE_L4_TCP;
770                                         else
771                                                 packet_type |= RTE_PTYPE_L4_UDP;
772                                 } else {
773                                         if (rcd->tcp) {
774                                                 packet_type |= RTE_PTYPE_L4_TCP;
775                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
776                                         } else if (rcd->udp) {
777                                                 packet_type |= RTE_PTYPE_L4_UDP;
778                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779                                         }
780                                 }
781                         } else if (rcd->v6) {
782                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
783
784                                 if (rcd->tuc) {
785                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
786                                         if (rcd->tcp)
787                                                 packet_type |= RTE_PTYPE_L4_TCP;
788                                         else
789                                                 packet_type |= RTE_PTYPE_L4_UDP;
790                                 } else {
791                                         if (rcd->tcp) {
792                                                 packet_type |= RTE_PTYPE_L4_TCP;
793                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
794                                         } else if (rcd->udp) {
795                                                 packet_type |= RTE_PTYPE_L4_UDP;
796                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
797                                         }
798                                 }
799                         } else {
800                                 packet_type |= RTE_PTYPE_UNKNOWN;
801                         }
802
803                         /* Old variants of vmxnet3 do not provide MSS */
804                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
805                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
806                                                 rcd, rxm);
807                 }
808         }
809
810         rxm->ol_flags = ol_flags;
811         rxm->packet_type = packet_type;
812 }
813
814 /*
815  * Process the Rx Completion Ring of given vmxnet3_rx_queue
816  * for nb_pkts burst and return the number of packets received
817  */
818 uint16_t
819 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
820 {
821         uint16_t nb_rx;
822         uint32_t nb_rxd, idx;
823         uint8_t ring_idx;
824         vmxnet3_rx_queue_t *rxq;
825         Vmxnet3_RxCompDesc *rcd;
826         vmxnet3_buf_info_t *rbi;
827         Vmxnet3_RxDesc *rxd;
828         struct rte_mbuf *rxm = NULL;
829         struct vmxnet3_hw *hw;
830
831         nb_rx = 0;
832         ring_idx = 0;
833         nb_rxd = 0;
834         idx = 0;
835
836         rxq = rx_queue;
837         hw = rxq->hw;
838
839         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
840
841         if (unlikely(rxq->stopped)) {
842                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
843                 return 0;
844         }
845
846         while (rcd->gen == rxq->comp_ring.gen) {
847                 struct rte_mbuf *newm;
848
849                 if (nb_rx >= nb_pkts)
850                         break;
851
852                 newm = rte_mbuf_raw_alloc(rxq->mp);
853                 if (unlikely(newm == NULL)) {
854                         PMD_RX_LOG(ERR, "Error allocating mbuf");
855                         rxq->stats.rx_buf_alloc_failure++;
856                         break;
857                 }
858
859                 idx = rcd->rxdIdx;
860                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
861                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
862                 RTE_SET_USED(rxd); /* used only for assert when enabled */
863                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
864
865                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
866
867                 RTE_ASSERT(rcd->len <= rxd->len);
868                 RTE_ASSERT(rbi->m);
869
870                 /* Get the packet buffer pointer from buf_info */
871                 rxm = rbi->m;
872
873                 /* Clear descriptor associated buf_info to be reused */
874                 rbi->m = NULL;
875                 rbi->bufPA = 0;
876
877                 /* Update the index that we received a packet */
878                 rxq->cmd_ring[ring_idx].next2comp = idx;
879
880                 /* For RCD with EOP set, check if there is frame error */
881                 if (unlikely(rcd->eop && rcd->err)) {
882                         rxq->stats.drop_total++;
883                         rxq->stats.drop_err++;
884
885                         if (!rcd->fcs) {
886                                 rxq->stats.drop_fcs++;
887                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
888                         }
889                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
890                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
891                                          rxq->comp_ring.base), rcd->rxdIdx);
892                         rte_pktmbuf_free_seg(rxm);
893                         if (rxq->start_seg) {
894                                 struct rte_mbuf *start = rxq->start_seg;
895
896                                 rxq->start_seg = NULL;
897                                 rte_pktmbuf_free(start);
898                         }
899                         goto rcd_done;
900                 }
901
902                 /* Initialize newly received packet buffer */
903                 rxm->port = rxq->port_id;
904                 rxm->nb_segs = 1;
905                 rxm->next = NULL;
906                 rxm->pkt_len = (uint16_t)rcd->len;
907                 rxm->data_len = (uint16_t)rcd->len;
908                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
909                 rxm->ol_flags = 0;
910                 rxm->vlan_tci = 0;
911                 rxm->packet_type = 0;
912
913                 /*
914                  * If this is the first buffer of the received packet,
915                  * set the pointer to the first mbuf of the packet
916                  * Otherwise, update the total length and the number of segments
917                  * of the current scattered packet, and update the pointer to
918                  * the last mbuf of the current packet.
919                  */
920                 if (rcd->sop) {
921                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
922
923                         if (unlikely(rcd->len == 0)) {
924                                 RTE_ASSERT(rcd->eop);
925
926                                 PMD_RX_LOG(DEBUG,
927                                            "Rx buf was skipped. rxring[%d][%d])",
928                                            ring_idx, idx);
929                                 rte_pktmbuf_free_seg(rxm);
930                                 goto rcd_done;
931                         }
932
933                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
934                                 uint8_t *rdd = rxq->data_ring.base +
935                                         idx * rxq->data_desc_size;
936
937                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
938                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
939                                            rdd, rcd->len);
940                         }
941
942                         rxq->start_seg = rxm;
943                         rxq->last_seg = rxm;
944                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
945                 } else {
946                         struct rte_mbuf *start = rxq->start_seg;
947
948                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
949
950                         if (rxm->data_len) {
951                                 start->pkt_len += rxm->data_len;
952                                 start->nb_segs++;
953
954                                 rxq->last_seg->next = rxm;
955                                 rxq->last_seg = rxm;
956                         } else {
957                                 rte_pktmbuf_free_seg(rxm);
958                         }
959                 }
960
961                 if (rcd->eop) {
962                         struct rte_mbuf *start = rxq->start_seg;
963
964                         vmxnet3_rx_offload(hw, rcd, start, 0);
965                         rx_pkts[nb_rx++] = start;
966                         rxq->start_seg = NULL;
967                 }
968
969 rcd_done:
970                 rxq->cmd_ring[ring_idx].next2comp = idx;
971                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
972                                           rxq->cmd_ring[ring_idx].size);
973
974                 /* It's time to renew descriptors */
975                 vmxnet3_renew_desc(rxq, ring_idx, newm);
976                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
977                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
978                                                rxq->cmd_ring[ring_idx].next2fill);
979                 }
980
981                 /* Advance to the next descriptor in comp_ring */
982                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
983
984                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
985                 nb_rxd++;
986                 if (nb_rxd > rxq->cmd_ring[0].size) {
987                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
988                                    " relinquish control.");
989                         break;
990                 }
991         }
992
993         if (unlikely(nb_rxd == 0)) {
994                 uint32_t avail;
995                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
996                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
997                         if (unlikely(avail > 0)) {
998                                 /* try to alloc new buf and renew descriptors */
999                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1000                         }
1001                 }
1002                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1003                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1004                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1005                                                        rxq->cmd_ring[ring_idx].next2fill);
1006                         }
1007                 }
1008         }
1009
1010         return nb_rx;
1011 }
1012
1013 int
1014 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1015                            uint16_t queue_idx,
1016                            uint16_t nb_desc,
1017                            unsigned int socket_id,
1018                            const struct rte_eth_txconf *tx_conf __rte_unused)
1019 {
1020         struct vmxnet3_hw *hw = dev->data->dev_private;
1021         const struct rte_memzone *mz;
1022         struct vmxnet3_tx_queue *txq;
1023         struct vmxnet3_cmd_ring *ring;
1024         struct vmxnet3_comp_ring *comp_ring;
1025         struct vmxnet3_data_ring *data_ring;
1026         int size;
1027
1028         PMD_INIT_FUNC_TRACE();
1029
1030         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1031                           RTE_CACHE_LINE_SIZE);
1032         if (txq == NULL) {
1033                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1034                 return -ENOMEM;
1035         }
1036
1037         txq->queue_id = queue_idx;
1038         txq->port_id = dev->data->port_id;
1039         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1040         txq->hw = hw;
1041         txq->qid = queue_idx;
1042         txq->stopped = TRUE;
1043         txq->txdata_desc_size = hw->txdata_desc_size;
1044
1045         ring = &txq->cmd_ring;
1046         comp_ring = &txq->comp_ring;
1047         data_ring = &txq->data_ring;
1048
1049         /* Tx vmxnet ring length should be between 512-4096 */
1050         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1051                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1052                              VMXNET3_DEF_TX_RING_SIZE);
1053                 return -EINVAL;
1054         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1055                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1056                              VMXNET3_TX_RING_MAX_SIZE);
1057                 return -EINVAL;
1058         } else {
1059                 ring->size = nb_desc;
1060                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1061         }
1062         comp_ring->size = data_ring->size = ring->size;
1063
1064         /* Tx vmxnet rings structure initialization*/
1065         ring->next2fill = 0;
1066         ring->next2comp = 0;
1067         ring->gen = VMXNET3_INIT_GEN;
1068         comp_ring->next2proc = 0;
1069         comp_ring->gen = VMXNET3_INIT_GEN;
1070
1071         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1072         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1073         size += txq->txdata_desc_size * data_ring->size;
1074
1075         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1076                                       VMXNET3_RING_BA_ALIGN, socket_id);
1077         if (mz == NULL) {
1078                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1079                 return -ENOMEM;
1080         }
1081         txq->mz = mz;
1082         memset(mz->addr, 0, mz->len);
1083
1084         /* cmd_ring initialization */
1085         ring->base = mz->addr;
1086         ring->basePA = mz->iova;
1087
1088         /* comp_ring initialization */
1089         comp_ring->base = ring->base + ring->size;
1090         comp_ring->basePA = ring->basePA +
1091                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1092
1093         /* data_ring initialization */
1094         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1095         data_ring->basePA = comp_ring->basePA +
1096                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1097
1098         /* cmd_ring0 buf_info allocation */
1099         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1100                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1101         if (ring->buf_info == NULL) {
1102                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1103                 return -ENOMEM;
1104         }
1105
1106         /* Update the data portion with txq */
1107         dev->data->tx_queues[queue_idx] = txq;
1108
1109         return 0;
1110 }
1111
1112 int
1113 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1114                            uint16_t queue_idx,
1115                            uint16_t nb_desc,
1116                            unsigned int socket_id,
1117                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1118                            struct rte_mempool *mp)
1119 {
1120         const struct rte_memzone *mz;
1121         struct vmxnet3_rx_queue *rxq;
1122         struct vmxnet3_hw *hw = dev->data->dev_private;
1123         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1124         struct vmxnet3_comp_ring *comp_ring;
1125         struct vmxnet3_rx_data_ring *data_ring;
1126         int size;
1127         uint8_t i;
1128         char mem_name[32];
1129
1130         PMD_INIT_FUNC_TRACE();
1131
1132         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1133                           RTE_CACHE_LINE_SIZE);
1134         if (rxq == NULL) {
1135                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1136                 return -ENOMEM;
1137         }
1138
1139         rxq->mp = mp;
1140         rxq->queue_id = queue_idx;
1141         rxq->port_id = dev->data->port_id;
1142         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1143         rxq->hw = hw;
1144         rxq->qid1 = queue_idx;
1145         rxq->qid2 = queue_idx + hw->num_rx_queues;
1146         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1147         rxq->data_desc_size = hw->rxdata_desc_size;
1148         rxq->stopped = TRUE;
1149
1150         ring0 = &rxq->cmd_ring[0];
1151         ring1 = &rxq->cmd_ring[1];
1152         comp_ring = &rxq->comp_ring;
1153         data_ring = &rxq->data_ring;
1154
1155         /* Rx vmxnet rings length should be between 256-4096 */
1156         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1157                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1158                 return -EINVAL;
1159         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1160                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1161                 return -EINVAL;
1162         } else {
1163                 ring0->size = nb_desc;
1164                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1165                 ring1->size = ring0->size;
1166         }
1167
1168         comp_ring->size = ring0->size + ring1->size;
1169         data_ring->size = ring0->size;
1170
1171         /* Rx vmxnet rings structure initialization */
1172         ring0->next2fill = 0;
1173         ring1->next2fill = 0;
1174         ring0->next2comp = 0;
1175         ring1->next2comp = 0;
1176         ring0->gen = VMXNET3_INIT_GEN;
1177         ring1->gen = VMXNET3_INIT_GEN;
1178         comp_ring->next2proc = 0;
1179         comp_ring->gen = VMXNET3_INIT_GEN;
1180
1181         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1182         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1183         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1184                 size += rxq->data_desc_size * data_ring->size;
1185
1186         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1187                                       VMXNET3_RING_BA_ALIGN, socket_id);
1188         if (mz == NULL) {
1189                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1190                 return -ENOMEM;
1191         }
1192         rxq->mz = mz;
1193         memset(mz->addr, 0, mz->len);
1194
1195         /* cmd_ring0 initialization */
1196         ring0->base = mz->addr;
1197         ring0->basePA = mz->iova;
1198
1199         /* cmd_ring1 initialization */
1200         ring1->base = ring0->base + ring0->size;
1201         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1202
1203         /* comp_ring initialization */
1204         comp_ring->base = ring1->base + ring1->size;
1205         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1206                 ring1->size;
1207
1208         /* data_ring initialization */
1209         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1210                 data_ring->base =
1211                         (uint8_t *)(comp_ring->base + comp_ring->size);
1212                 data_ring->basePA = comp_ring->basePA +
1213                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1214         }
1215
1216         /* cmd_ring0-cmd_ring1 buf_info allocation */
1217         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1218
1219                 ring = &rxq->cmd_ring[i];
1220                 ring->rid = i;
1221                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1222
1223                 ring->buf_info = rte_zmalloc(mem_name,
1224                                              ring->size * sizeof(vmxnet3_buf_info_t),
1225                                              RTE_CACHE_LINE_SIZE);
1226                 if (ring->buf_info == NULL) {
1227                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1228                         return -ENOMEM;
1229                 }
1230         }
1231
1232         /* Update the data portion with rxq */
1233         dev->data->rx_queues[queue_idx] = rxq;
1234
1235         return 0;
1236 }
1237
1238 /*
1239  * Initializes Receive Unit
1240  * Load mbufs in rx queue in advance
1241  */
1242 int
1243 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1244 {
1245         struct vmxnet3_hw *hw = dev->data->dev_private;
1246
1247         int i, ret;
1248         uint8_t j;
1249
1250         PMD_INIT_FUNC_TRACE();
1251
1252         for (i = 0; i < hw->num_rx_queues; i++) {
1253                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1254
1255                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1256                         /* Passing 0 as alloc_num will allocate full ring */
1257                         ret = vmxnet3_post_rx_bufs(rxq, j);
1258                         if (ret <= 0) {
1259                                 PMD_INIT_LOG(ERR,
1260                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1261                                              i, j);
1262                                 return -ret;
1263                         }
1264                         /*
1265                          * Updating device with the index:next2fill to fill the
1266                          * mbufs for coming packets.
1267                          */
1268                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1269                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1270                                                        rxq->cmd_ring[j].next2fill);
1271                         }
1272                 }
1273                 rxq->stopped = FALSE;
1274                 rxq->start_seg = NULL;
1275         }
1276
1277         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1278                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1279
1280                 txq->stopped = FALSE;
1281         }
1282
1283         return 0;
1284 }
1285
1286 static uint8_t rss_intel_key[40] = {
1287         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1288         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1289         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1290         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1291         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1292 };
1293
1294 /*
1295  * Additional RSS configurations based on vmxnet v4+ APIs
1296  */
1297 int
1298 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1299 {
1300         struct vmxnet3_hw *hw = dev->data->dev_private;
1301         Vmxnet3_DriverShared *shared = hw->shared;
1302         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1303         struct rte_eth_rss_conf *port_rss_conf;
1304         uint64_t rss_hf;
1305         uint32_t ret;
1306
1307         PMD_INIT_FUNC_TRACE();
1308
1309         cmdInfo->setRSSFields = 0;
1310         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1311         rss_hf = port_rss_conf->rss_hf &
1312                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1313
1314         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1315                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1316         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1317                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1318         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1319                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1320         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1321                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1322
1323         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1324                                VMXNET3_CMD_SET_RSS_FIELDS);
1325         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1326
1327         if (ret != VMXNET3_SUCCESS) {
1328                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1329         }
1330
1331         return ret;
1332 }
1333
1334 /*
1335  * Configure RSS feature
1336  */
1337 int
1338 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1339 {
1340         struct vmxnet3_hw *hw = dev->data->dev_private;
1341         struct VMXNET3_RSSConf *dev_rss_conf;
1342         struct rte_eth_rss_conf *port_rss_conf;
1343         uint64_t rss_hf;
1344         uint8_t i, j;
1345
1346         PMD_INIT_FUNC_TRACE();
1347
1348         dev_rss_conf = hw->rss_conf;
1349         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1350
1351         /* loading hashFunc */
1352         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1353         /* loading hashKeySize */
1354         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1355         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1356         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1357
1358         if (port_rss_conf->rss_key == NULL) {
1359                 /* Default hash key */
1360                 port_rss_conf->rss_key = rss_intel_key;
1361         }
1362
1363         /* loading hashKey */
1364         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1365                dev_rss_conf->hashKeySize);
1366
1367         /* loading indTable */
1368         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1369                 if (j == dev->data->nb_rx_queues)
1370                         j = 0;
1371                 dev_rss_conf->indTable[i] = j;
1372         }
1373
1374         /* loading hashType */
1375         dev_rss_conf->hashType = 0;
1376         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1377         if (rss_hf & ETH_RSS_IPV4)
1378                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1379         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1380                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1381         if (rss_hf & ETH_RSS_IPV6)
1382                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1383         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1384                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1385
1386         return VMXNET3_SUCCESS;
1387 }