net: add rte prefix to UDP structure
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_IPV6 |     \
54                 PKT_TX_IPV4 |     \
55                 PKT_TX_L4_MASK |  \
56                 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
59         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74         uint32_t avail = 0;
75
76         if (rxq == NULL)
77                 return;
78
79         PMD_RX_LOG(DEBUG,
80                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82         PMD_RX_LOG(DEBUG,
83                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84                    (unsigned long)rxq->cmd_ring[0].basePA,
85                    (unsigned long)rxq->cmd_ring[1].basePA,
86                    (unsigned long)rxq->comp_ring.basePA);
87
88         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89         PMD_RX_LOG(DEBUG,
90                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91                    (uint32_t)rxq->cmd_ring[0].size, avail,
92                    rxq->comp_ring.next2proc,
93                    rxq->cmd_ring[0].size - avail);
94
95         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98                    rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105         uint32_t avail = 0;
106
107         if (txq == NULL)
108                 return;
109
110         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113                    (unsigned long)txq->cmd_ring.basePA,
114                    (unsigned long)txq->comp_ring.basePA,
115                    (unsigned long)txq->data_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119                    (uint32_t)txq->cmd_ring.size, avail,
120                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127         while (ring->next2comp != ring->next2fill) {
128                 /* No need to worry about desc ownership, device is quiesced by now. */
129                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131                 if (buf_info->m) {
132                         rte_pktmbuf_free(buf_info->m);
133                         buf_info->m = NULL;
134                         buf_info->bufPA = 0;
135                         buf_info->len = 0;
136                 }
137                 vmxnet3_cmd_ring_adv_next2comp(ring);
138         }
139 }
140
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144         uint32_t i;
145
146         for (i = 0; i < ring->size; i++) {
147                 /* No need to worry about desc ownership, device is quiesced by now. */
148                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150                 if (buf_info->m) {
151                         rte_pktmbuf_free_seg(buf_info->m);
152                         buf_info->m = NULL;
153                         buf_info->bufPA = 0;
154                         buf_info->len = 0;
155                 }
156                 vmxnet3_cmd_ring_adv_next2comp(ring);
157         }
158 }
159
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170         vmxnet3_tx_queue_t *tq = txq;
171
172         if (tq != NULL) {
173                 /* Release mbufs */
174                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177                 /* Release the memzone */
178                 rte_memzone_free(tq->mz);
179                 /* Release the queue */
180                 rte_free(tq);
181         }
182 }
183
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187         int i;
188         vmxnet3_rx_queue_t *rq = rxq;
189
190         if (rq != NULL) {
191                 /* Release mbufs */
192                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195                 /* Release both the cmd_rings */
196                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199                 /* Release the memzone */
200                 rte_memzone_free(rq->mz);
201
202                 /* Release the queue */
203                 rte_free(rq);
204         }
205 }
206
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210         vmxnet3_tx_queue_t *tq = txq;
211         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214         int size;
215
216         if (tq != NULL) {
217                 /* Release the cmd_ring mbufs */
218                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219         }
220
221         /* Tx vmxnet rings structure initialization*/
222         ring->next2fill = 0;
223         ring->next2comp = 0;
224         ring->gen = VMXNET3_INIT_GEN;
225         comp_ring->next2proc = 0;
226         comp_ring->gen = VMXNET3_INIT_GEN;
227
228         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230         size += tq->txdata_desc_size * data_ring->size;
231
232         memset(ring->base, 0, size);
233 }
234
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238         int i;
239         vmxnet3_rx_queue_t *rq = rxq;
240         struct vmxnet3_hw *hw = rq->hw;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244         int size;
245
246         /* Release both the cmd_rings mbufs */
247         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250         ring0 = &rq->cmd_ring[0];
251         ring1 = &rq->cmd_ring[1];
252         comp_ring = &rq->comp_ring;
253
254         /* Rx vmxnet rings structure initialization */
255         ring0->next2fill = 0;
256         ring1->next2fill = 0;
257         ring0->next2comp = 0;
258         ring1->next2comp = 0;
259         ring0->gen = VMXNET3_INIT_GEN;
260         ring1->gen = VMXNET3_INIT_GEN;
261         comp_ring->next2proc = 0;
262         comp_ring->gen = VMXNET3_INIT_GEN;
263
264         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267                 size += rq->data_desc_size * data_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303
304         /* Release cmd_ring descriptor and free mbuf */
305         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308         if (mbuf == NULL)
309                 rte_panic("EOP desc does not point to a valid mbuf");
310         rte_pktmbuf_free(mbuf);
311
312         txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314         while (txq->cmd_ring.next2comp != eop_idx) {
315                 /* no out-of-order completion */
316                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318                 completed++;
319         }
320
321         /* Mark the txd for which tcd was generated as completed */
322         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324         return completed + 1;
325 }
326
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330         int completed = 0;
331         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333                 (comp_ring->base + comp_ring->next2proc);
334
335         while (tcd->gen == comp_ring->gen) {
336                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340                                                     comp_ring->next2proc);
341         }
342
343         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348         uint16_t nb_pkts)
349 {
350         int32_t ret;
351         uint32_t i;
352         uint64_t ol_flags;
353         struct rte_mbuf *m;
354
355         for (i = 0; i != nb_pkts; i++) {
356                 m = tx_pkts[i];
357                 ol_flags = m->ol_flags;
358
359                 /* Non-TSO packet cannot occupy more than
360                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361                  */
362                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364                         rte_errno = -EINVAL;
365                         return i;
366                 }
367
368                 /* check that only supported TX offloads are requested. */
369                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370                                 (ol_flags & PKT_TX_L4_MASK) ==
371                                 PKT_TX_SCTP_CKSUM) {
372                         rte_errno = -ENOTSUP;
373                         return i;
374                 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377                 ret = rte_validate_tx_offload(m);
378                 if (ret != 0) {
379                         rte_errno = ret;
380                         return i;
381                 }
382 #endif
383                 ret = rte_net_intel_cksum_prepare(m);
384                 if (ret != 0) {
385                         rte_errno = ret;
386                         return i;
387                 }
388         }
389
390         return i;
391 }
392
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395                   uint16_t nb_pkts)
396 {
397         uint16_t nb_tx;
398         vmxnet3_tx_queue_t *txq = tx_queue;
399         struct vmxnet3_hw *hw = txq->hw;
400         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403         if (unlikely(txq->stopped)) {
404                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405                 return 0;
406         }
407
408         /* Free up the comp_descriptors aggressively */
409         vmxnet3_tq_tx_complete(txq);
410
411         nb_tx = 0;
412         while (nb_tx < nb_pkts) {
413                 Vmxnet3_GenericDesc *gdesc;
414                 vmxnet3_buf_info_t *tbi;
415                 uint32_t first2fill, avail, dw2;
416                 struct rte_mbuf *txm = tx_pkts[nb_tx];
417                 struct rte_mbuf *m_seg = txm;
418                 int copy_size = 0;
419                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420                 /* # of descriptors needed for a packet. */
421                 unsigned count = txm->nb_segs;
422
423                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424                 if (count > avail) {
425                         /* Is command ring full? */
426                         if (unlikely(avail == 0)) {
427                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428                                 txq->stats.tx_ring_full++;
429                                 txq->stats.drop_total += (nb_pkts - nb_tx);
430                                 break;
431                         }
432
433                         /* Command ring is not full but cannot handle the
434                          * multi-segmented packet. Let's try the next packet
435                          * in this case.
436                          */
437                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438                                    "(avail %d needed %d)", avail, count);
439                         txq->stats.drop_total++;
440                         if (tso)
441                                 txq->stats.drop_tso++;
442                         rte_pktmbuf_free(txm);
443                         nb_tx++;
444                         continue;
445                 }
446
447                 /* Drop non-TSO packet that is excessively fragmented */
448                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451                         txq->stats.drop_too_many_segs++;
452                         txq->stats.drop_total++;
453                         rte_pktmbuf_free(txm);
454                         nb_tx++;
455                         continue;
456                 }
457
458                 if (txm->nb_segs == 1 &&
459                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460                         struct Vmxnet3_TxDataDesc *tdd;
461
462                         /* Skip empty packets */
463                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464                                 txq->stats.drop_total++;
465                                 rte_pktmbuf_free(txm);
466                                 nb_tx++;
467                                 continue;
468                         }
469
470                         tdd = (struct Vmxnet3_TxDataDesc *)
471                                 ((uint8 *)txq->data_ring.base +
472                                  txq->cmd_ring.next2fill *
473                                  txq->txdata_desc_size);
474                         copy_size = rte_pktmbuf_pkt_len(txm);
475                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476                 }
477
478                 /* use the previous gen bit for the SOP desc */
479                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480                 first2fill = txq->cmd_ring.next2fill;
481                 do {
482                         /* Remember the transmit buffer for cleanup */
483                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485                         /* NB: the following assumes that VMXNET3 maximum
486                          * transmit buffer size (16K) is greater than
487                          * maximum size of mbuf segment size.
488                          */
489                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491                         /* Skip empty segments */
492                         if (unlikely(m_seg->data_len == 0))
493                                 continue;
494
495                         if (copy_size) {
496                                 uint64 offset =
497                                         (uint64)txq->cmd_ring.next2fill *
498                                                         txq->txdata_desc_size;
499                                 gdesc->txd.addr =
500                                         rte_cpu_to_le_64(txq->data_ring.basePA +
501                                                          offset);
502                         } else {
503                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504                         }
505
506                         gdesc->dword[2] = dw2 | m_seg->data_len;
507                         gdesc->dword[3] = 0;
508
509                         /* move to the next2fill descriptor */
510                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512                         /* use the right gen for non-SOP desc */
513                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514                 } while ((m_seg = m_seg->next) != NULL);
515
516                 /* set the last buf_info for the pkt */
517                 tbi->m = txm;
518                 /* Update the EOP descriptor */
519                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521                 /* Add VLAN tag if present */
522                 gdesc = txq->cmd_ring.base + first2fill;
523                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524                         gdesc->txd.ti = 1;
525                         gdesc->txd.tci = txm->vlan_tci;
526                 }
527
528                 if (tso) {
529                         uint16_t mss = txm->tso_segsz;
530
531                         RTE_ASSERT(mss > 0);
532
533                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534                         gdesc->txd.om = VMXNET3_OM_TSO;
535                         gdesc->txd.msscof = mss;
536
537                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539                         gdesc->txd.om = VMXNET3_OM_CSUM;
540                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
543                         case PKT_TX_TCP_CKSUM:
544                                 gdesc->txd.msscof = gdesc->txd.hlen +
545                                         offsetof(struct rte_tcp_hdr, cksum);
546                                 break;
547                         case PKT_TX_UDP_CKSUM:
548                                 gdesc->txd.msscof = gdesc->txd.hlen +
549                                         offsetof(struct rte_udp_hdr,
550                                                 dgram_cksum);
551                                 break;
552                         default:
553                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554                                            txm->ol_flags & PKT_TX_L4_MASK);
555                                 abort();
556                         }
557                         deferred++;
558                 } else {
559                         gdesc->txd.hlen = 0;
560                         gdesc->txd.om = VMXNET3_OM_NONE;
561                         gdesc->txd.msscof = 0;
562                         deferred++;
563                 }
564
565                 /* flip the GEN bit on the SOP */
566                 rte_compiler_barrier();
567                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568
569                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570                 nb_tx++;
571         }
572
573         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574
575         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576                 txq_ctrl->txNumDeferred = 0;
577                 /* Notify vSwitch that packets are available. */
578                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579                                        txq->cmd_ring.next2fill);
580         }
581
582         return nb_tx;
583 }
584
585 static inline void
586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587                    struct rte_mbuf *mbuf)
588 {
589         uint32_t val;
590         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591         struct Vmxnet3_RxDesc *rxd =
592                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594
595         if (ring_id == 0) {
596                 /* Usually: One HEAD type buf per packet
597                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599                  */
600
601                 /* We use single packet buffer so all heads here */
602                 val = VMXNET3_RXD_BTYPE_HEAD;
603         } else {
604                 /* All BODY type buffers for 2nd ring */
605                 val = VMXNET3_RXD_BTYPE_BODY;
606         }
607
608         /*
609          * Load mbuf pointer into buf_info[ring_size]
610          * buf_info structure is equivalent to cookie for virtio-virtqueue
611          */
612         buf_info->m = mbuf;
613         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615
616         /* Load Rx Descriptor with the buffer's GPA */
617         rxd->addr = buf_info->bufPA;
618
619         /* After this point rxd->addr MUST not be NULL */
620         rxd->btype = val;
621         rxd->len = buf_info->len;
622         /* Flip gen bit at the end to change ownership */
623         rxd->gen = ring->gen;
624
625         vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
629  *  so that device can receive packets in those buffers.
630  *  Ring layout:
631  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
632  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
633  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
635  *      only for LRO.
636  */
637 static int
638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640         int err = 0;
641         uint32_t i = 0;
642         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643
644         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645                 struct rte_mbuf *mbuf;
646
647                 /* Allocate blank mbuf for the current Rx Descriptor */
648                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
649                 if (unlikely(mbuf == NULL)) {
650                         PMD_RX_LOG(ERR, "Error allocating mbuf");
651                         rxq->stats.rx_buf_alloc_failure++;
652                         err = ENOMEM;
653                         break;
654                 }
655
656                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
657                 i++;
658         }
659
660         /* Return error only if no buffers are posted at present */
661         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662                 return -err;
663         else
664                 return i;
665 }
666
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670                 struct rte_mbuf *rxm)
671 {
672         uint32_t hlen, slen;
673         struct rte_ipv4_hdr *ipv4_hdr;
674         struct rte_ipv6_hdr *ipv6_hdr;
675         struct rte_tcp_hdr *tcp_hdr;
676         char *ptr;
677
678         RTE_ASSERT(rcd->tcp);
679
680         ptr = rte_pktmbuf_mtod(rxm, char *);
681         slen = rte_pktmbuf_data_len(rxm);
682         hlen = sizeof(struct rte_ether_hdr);
683
684         if (rcd->v4) {
685                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
686                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
687                                         - sizeof(struct rte_tcp_hdr);
688
689                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
690                 hlen += (ipv4_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
691                                 RTE_IPV4_IHL_MULTIPLIER;
692         } else if (rcd->v6) {
693                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
694                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
695                                         sizeof(struct rte_tcp_hdr);
696
697                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
698                 hlen += sizeof(struct rte_ipv6_hdr);
699                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
700                         int frag;
701
702                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
703                                         &hlen, &frag);
704                 }
705         }
706
707         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
708                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
709                                 sizeof(struct rte_ether_hdr);
710
711         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
712         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
713
714         if (rxm->udata64 > 1)
715                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
716                                 rxm->udata64 - 1) / rxm->udata64;
717         else
718                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
719 }
720
721 /* Receive side checksum and other offloads */
722 static inline void
723 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
724                 struct rte_mbuf *rxm, const uint8_t sop)
725 {
726         uint64_t ol_flags = rxm->ol_flags;
727         uint32_t packet_type = rxm->packet_type;
728
729         /* Offloads set in sop */
730         if (sop) {
731                 /* Set packet type */
732                 packet_type |= RTE_PTYPE_L2_ETHER;
733
734                 /* Check large packet receive */
735                 if (VMXNET3_VERSION_GE_2(hw) &&
736                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
737                         const Vmxnet3_RxCompDescExt *rcde =
738                                         (const Vmxnet3_RxCompDescExt *)rcd;
739
740                         rxm->tso_segsz = rcde->mss;
741                         rxm->udata64 = rcde->segCnt;
742                         ol_flags |= PKT_RX_LRO;
743                 }
744         } else { /* Offloads set in eop */
745                 /* Check for RSS */
746                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
747                         ol_flags |= PKT_RX_RSS_HASH;
748                         rxm->hash.rss = rcd->rssHash;
749                 }
750
751                 /* Check for hardware stripped VLAN tag */
752                 if (rcd->ts) {
753                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
754                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
755                 }
756
757                 /* Check packet type, checksum errors, etc. */
758                 if (rcd->cnc) {
759                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
760                 } else {
761                         if (rcd->v4) {
762                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
763
764                                 if (rcd->ipc)
765                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
766                                 else
767                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
768
769                                 if (rcd->tuc) {
770                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771                                         if (rcd->tcp)
772                                                 packet_type |= RTE_PTYPE_L4_TCP;
773                                         else
774                                                 packet_type |= RTE_PTYPE_L4_UDP;
775                                 } else {
776                                         if (rcd->tcp) {
777                                                 packet_type |= RTE_PTYPE_L4_TCP;
778                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779                                         } else if (rcd->udp) {
780                                                 packet_type |= RTE_PTYPE_L4_UDP;
781                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
782                                         }
783                                 }
784                         } else if (rcd->v6) {
785                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
786
787                                 if (rcd->tuc) {
788                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
789                                         if (rcd->tcp)
790                                                 packet_type |= RTE_PTYPE_L4_TCP;
791                                         else
792                                                 packet_type |= RTE_PTYPE_L4_UDP;
793                                 } else {
794                                         if (rcd->tcp) {
795                                                 packet_type |= RTE_PTYPE_L4_TCP;
796                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
797                                         } else if (rcd->udp) {
798                                                 packet_type |= RTE_PTYPE_L4_UDP;
799                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
800                                         }
801                                 }
802                         } else {
803                                 packet_type |= RTE_PTYPE_UNKNOWN;
804                         }
805
806                         /* Old variants of vmxnet3 do not provide MSS */
807                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
808                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
809                                                 rcd, rxm);
810                 }
811         }
812
813         rxm->ol_flags = ol_flags;
814         rxm->packet_type = packet_type;
815 }
816
817 /*
818  * Process the Rx Completion Ring of given vmxnet3_rx_queue
819  * for nb_pkts burst and return the number of packets received
820  */
821 uint16_t
822 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
823 {
824         uint16_t nb_rx;
825         uint32_t nb_rxd, idx;
826         uint8_t ring_idx;
827         vmxnet3_rx_queue_t *rxq;
828         Vmxnet3_RxCompDesc *rcd;
829         vmxnet3_buf_info_t *rbi;
830         Vmxnet3_RxDesc *rxd;
831         struct rte_mbuf *rxm = NULL;
832         struct vmxnet3_hw *hw;
833
834         nb_rx = 0;
835         ring_idx = 0;
836         nb_rxd = 0;
837         idx = 0;
838
839         rxq = rx_queue;
840         hw = rxq->hw;
841
842         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
843
844         if (unlikely(rxq->stopped)) {
845                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846                 return 0;
847         }
848
849         while (rcd->gen == rxq->comp_ring.gen) {
850                 struct rte_mbuf *newm;
851
852                 if (nb_rx >= nb_pkts)
853                         break;
854
855                 newm = rte_mbuf_raw_alloc(rxq->mp);
856                 if (unlikely(newm == NULL)) {
857                         PMD_RX_LOG(ERR, "Error allocating mbuf");
858                         rxq->stats.rx_buf_alloc_failure++;
859                         break;
860                 }
861
862                 idx = rcd->rxdIdx;
863                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
864                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
865                 RTE_SET_USED(rxd); /* used only for assert when enabled */
866                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
867
868                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
869
870                 RTE_ASSERT(rcd->len <= rxd->len);
871                 RTE_ASSERT(rbi->m);
872
873                 /* Get the packet buffer pointer from buf_info */
874                 rxm = rbi->m;
875
876                 /* Clear descriptor associated buf_info to be reused */
877                 rbi->m = NULL;
878                 rbi->bufPA = 0;
879
880                 /* Update the index that we received a packet */
881                 rxq->cmd_ring[ring_idx].next2comp = idx;
882
883                 /* For RCD with EOP set, check if there is frame error */
884                 if (unlikely(rcd->eop && rcd->err)) {
885                         rxq->stats.drop_total++;
886                         rxq->stats.drop_err++;
887
888                         if (!rcd->fcs) {
889                                 rxq->stats.drop_fcs++;
890                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
891                         }
892                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
893                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
894                                          rxq->comp_ring.base), rcd->rxdIdx);
895                         rte_pktmbuf_free_seg(rxm);
896                         if (rxq->start_seg) {
897                                 struct rte_mbuf *start = rxq->start_seg;
898
899                                 rxq->start_seg = NULL;
900                                 rte_pktmbuf_free(start);
901                         }
902                         goto rcd_done;
903                 }
904
905                 /* Initialize newly received packet buffer */
906                 rxm->port = rxq->port_id;
907                 rxm->nb_segs = 1;
908                 rxm->next = NULL;
909                 rxm->pkt_len = (uint16_t)rcd->len;
910                 rxm->data_len = (uint16_t)rcd->len;
911                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
912                 rxm->ol_flags = 0;
913                 rxm->vlan_tci = 0;
914                 rxm->packet_type = 0;
915
916                 /*
917                  * If this is the first buffer of the received packet,
918                  * set the pointer to the first mbuf of the packet
919                  * Otherwise, update the total length and the number of segments
920                  * of the current scattered packet, and update the pointer to
921                  * the last mbuf of the current packet.
922                  */
923                 if (rcd->sop) {
924                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
925
926                         if (unlikely(rcd->len == 0)) {
927                                 RTE_ASSERT(rcd->eop);
928
929                                 PMD_RX_LOG(DEBUG,
930                                            "Rx buf was skipped. rxring[%d][%d])",
931                                            ring_idx, idx);
932                                 rte_pktmbuf_free_seg(rxm);
933                                 goto rcd_done;
934                         }
935
936                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
937                                 uint8_t *rdd = rxq->data_ring.base +
938                                         idx * rxq->data_desc_size;
939
940                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
941                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942                                            rdd, rcd->len);
943                         }
944
945                         rxq->start_seg = rxm;
946                         rxq->last_seg = rxm;
947                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
948                 } else {
949                         struct rte_mbuf *start = rxq->start_seg;
950
951                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
952
953                         if (rxm->data_len) {
954                                 start->pkt_len += rxm->data_len;
955                                 start->nb_segs++;
956
957                                 rxq->last_seg->next = rxm;
958                                 rxq->last_seg = rxm;
959                         } else {
960                                 rte_pktmbuf_free_seg(rxm);
961                         }
962                 }
963
964                 if (rcd->eop) {
965                         struct rte_mbuf *start = rxq->start_seg;
966
967                         vmxnet3_rx_offload(hw, rcd, start, 0);
968                         rx_pkts[nb_rx++] = start;
969                         rxq->start_seg = NULL;
970                 }
971
972 rcd_done:
973                 rxq->cmd_ring[ring_idx].next2comp = idx;
974                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
975                                           rxq->cmd_ring[ring_idx].size);
976
977                 /* It's time to renew descriptors */
978                 vmxnet3_renew_desc(rxq, ring_idx, newm);
979                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
980                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
981                                                rxq->cmd_ring[ring_idx].next2fill);
982                 }
983
984                 /* Advance to the next descriptor in comp_ring */
985                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
986
987                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
988                 nb_rxd++;
989                 if (nb_rxd > rxq->cmd_ring[0].size) {
990                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
991                                    " relinquish control.");
992                         break;
993                 }
994         }
995
996         if (unlikely(nb_rxd == 0)) {
997                 uint32_t avail;
998                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
999                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1000                         if (unlikely(avail > 0)) {
1001                                 /* try to alloc new buf and renew descriptors */
1002                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1003                         }
1004                 }
1005                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1006                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1007                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1008                                                        rxq->cmd_ring[ring_idx].next2fill);
1009                         }
1010                 }
1011         }
1012
1013         return nb_rx;
1014 }
1015
1016 int
1017 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1018                            uint16_t queue_idx,
1019                            uint16_t nb_desc,
1020                            unsigned int socket_id,
1021                            const struct rte_eth_txconf *tx_conf __rte_unused)
1022 {
1023         struct vmxnet3_hw *hw = dev->data->dev_private;
1024         const struct rte_memzone *mz;
1025         struct vmxnet3_tx_queue *txq;
1026         struct vmxnet3_cmd_ring *ring;
1027         struct vmxnet3_comp_ring *comp_ring;
1028         struct vmxnet3_data_ring *data_ring;
1029         int size;
1030
1031         PMD_INIT_FUNC_TRACE();
1032
1033         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1034                           RTE_CACHE_LINE_SIZE);
1035         if (txq == NULL) {
1036                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1037                 return -ENOMEM;
1038         }
1039
1040         txq->queue_id = queue_idx;
1041         txq->port_id = dev->data->port_id;
1042         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1043         txq->hw = hw;
1044         txq->qid = queue_idx;
1045         txq->stopped = TRUE;
1046         txq->txdata_desc_size = hw->txdata_desc_size;
1047
1048         ring = &txq->cmd_ring;
1049         comp_ring = &txq->comp_ring;
1050         data_ring = &txq->data_ring;
1051
1052         /* Tx vmxnet ring length should be between 512-4096 */
1053         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1054                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1055                              VMXNET3_DEF_TX_RING_SIZE);
1056                 return -EINVAL;
1057         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1058                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1059                              VMXNET3_TX_RING_MAX_SIZE);
1060                 return -EINVAL;
1061         } else {
1062                 ring->size = nb_desc;
1063                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1064         }
1065         comp_ring->size = data_ring->size = ring->size;
1066
1067         /* Tx vmxnet rings structure initialization*/
1068         ring->next2fill = 0;
1069         ring->next2comp = 0;
1070         ring->gen = VMXNET3_INIT_GEN;
1071         comp_ring->next2proc = 0;
1072         comp_ring->gen = VMXNET3_INIT_GEN;
1073
1074         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1075         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1076         size += txq->txdata_desc_size * data_ring->size;
1077
1078         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1079                                       VMXNET3_RING_BA_ALIGN, socket_id);
1080         if (mz == NULL) {
1081                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1082                 return -ENOMEM;
1083         }
1084         txq->mz = mz;
1085         memset(mz->addr, 0, mz->len);
1086
1087         /* cmd_ring initialization */
1088         ring->base = mz->addr;
1089         ring->basePA = mz->iova;
1090
1091         /* comp_ring initialization */
1092         comp_ring->base = ring->base + ring->size;
1093         comp_ring->basePA = ring->basePA +
1094                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1095
1096         /* data_ring initialization */
1097         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1098         data_ring->basePA = comp_ring->basePA +
1099                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1100
1101         /* cmd_ring0 buf_info allocation */
1102         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1103                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1104         if (ring->buf_info == NULL) {
1105                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1106                 return -ENOMEM;
1107         }
1108
1109         /* Update the data portion with txq */
1110         dev->data->tx_queues[queue_idx] = txq;
1111
1112         return 0;
1113 }
1114
1115 int
1116 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1117                            uint16_t queue_idx,
1118                            uint16_t nb_desc,
1119                            unsigned int socket_id,
1120                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1121                            struct rte_mempool *mp)
1122 {
1123         const struct rte_memzone *mz;
1124         struct vmxnet3_rx_queue *rxq;
1125         struct vmxnet3_hw *hw = dev->data->dev_private;
1126         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1127         struct vmxnet3_comp_ring *comp_ring;
1128         struct vmxnet3_rx_data_ring *data_ring;
1129         int size;
1130         uint8_t i;
1131         char mem_name[32];
1132
1133         PMD_INIT_FUNC_TRACE();
1134
1135         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1136                           RTE_CACHE_LINE_SIZE);
1137         if (rxq == NULL) {
1138                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1139                 return -ENOMEM;
1140         }
1141
1142         rxq->mp = mp;
1143         rxq->queue_id = queue_idx;
1144         rxq->port_id = dev->data->port_id;
1145         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1146         rxq->hw = hw;
1147         rxq->qid1 = queue_idx;
1148         rxq->qid2 = queue_idx + hw->num_rx_queues;
1149         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1150         rxq->data_desc_size = hw->rxdata_desc_size;
1151         rxq->stopped = TRUE;
1152
1153         ring0 = &rxq->cmd_ring[0];
1154         ring1 = &rxq->cmd_ring[1];
1155         comp_ring = &rxq->comp_ring;
1156         data_ring = &rxq->data_ring;
1157
1158         /* Rx vmxnet rings length should be between 256-4096 */
1159         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1160                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1161                 return -EINVAL;
1162         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1163                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1164                 return -EINVAL;
1165         } else {
1166                 ring0->size = nb_desc;
1167                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1168                 ring1->size = ring0->size;
1169         }
1170
1171         comp_ring->size = ring0->size + ring1->size;
1172         data_ring->size = ring0->size;
1173
1174         /* Rx vmxnet rings structure initialization */
1175         ring0->next2fill = 0;
1176         ring1->next2fill = 0;
1177         ring0->next2comp = 0;
1178         ring1->next2comp = 0;
1179         ring0->gen = VMXNET3_INIT_GEN;
1180         ring1->gen = VMXNET3_INIT_GEN;
1181         comp_ring->next2proc = 0;
1182         comp_ring->gen = VMXNET3_INIT_GEN;
1183
1184         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1185         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1186         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1187                 size += rxq->data_desc_size * data_ring->size;
1188
1189         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1190                                       VMXNET3_RING_BA_ALIGN, socket_id);
1191         if (mz == NULL) {
1192                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1193                 return -ENOMEM;
1194         }
1195         rxq->mz = mz;
1196         memset(mz->addr, 0, mz->len);
1197
1198         /* cmd_ring0 initialization */
1199         ring0->base = mz->addr;
1200         ring0->basePA = mz->iova;
1201
1202         /* cmd_ring1 initialization */
1203         ring1->base = ring0->base + ring0->size;
1204         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1205
1206         /* comp_ring initialization */
1207         comp_ring->base = ring1->base + ring1->size;
1208         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1209                 ring1->size;
1210
1211         /* data_ring initialization */
1212         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1213                 data_ring->base =
1214                         (uint8_t *)(comp_ring->base + comp_ring->size);
1215                 data_ring->basePA = comp_ring->basePA +
1216                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1217         }
1218
1219         /* cmd_ring0-cmd_ring1 buf_info allocation */
1220         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1221
1222                 ring = &rxq->cmd_ring[i];
1223                 ring->rid = i;
1224                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1225
1226                 ring->buf_info = rte_zmalloc(mem_name,
1227                                              ring->size * sizeof(vmxnet3_buf_info_t),
1228                                              RTE_CACHE_LINE_SIZE);
1229                 if (ring->buf_info == NULL) {
1230                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1231                         return -ENOMEM;
1232                 }
1233         }
1234
1235         /* Update the data portion with rxq */
1236         dev->data->rx_queues[queue_idx] = rxq;
1237
1238         return 0;
1239 }
1240
1241 /*
1242  * Initializes Receive Unit
1243  * Load mbufs in rx queue in advance
1244  */
1245 int
1246 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1247 {
1248         struct vmxnet3_hw *hw = dev->data->dev_private;
1249
1250         int i, ret;
1251         uint8_t j;
1252
1253         PMD_INIT_FUNC_TRACE();
1254
1255         for (i = 0; i < hw->num_rx_queues; i++) {
1256                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1257
1258                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1259                         /* Passing 0 as alloc_num will allocate full ring */
1260                         ret = vmxnet3_post_rx_bufs(rxq, j);
1261                         if (ret <= 0) {
1262                                 PMD_INIT_LOG(ERR,
1263                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1264                                              i, j);
1265                                 return -ret;
1266                         }
1267                         /*
1268                          * Updating device with the index:next2fill to fill the
1269                          * mbufs for coming packets.
1270                          */
1271                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1272                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1273                                                        rxq->cmd_ring[j].next2fill);
1274                         }
1275                 }
1276                 rxq->stopped = FALSE;
1277                 rxq->start_seg = NULL;
1278         }
1279
1280         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1281                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1282
1283                 txq->stopped = FALSE;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static uint8_t rss_intel_key[40] = {
1290         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1291         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1292         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1293         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1294         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1295 };
1296
1297 /*
1298  * Additional RSS configurations based on vmxnet v4+ APIs
1299  */
1300 int
1301 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1302 {
1303         struct vmxnet3_hw *hw = dev->data->dev_private;
1304         Vmxnet3_DriverShared *shared = hw->shared;
1305         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1306         struct rte_eth_rss_conf *port_rss_conf;
1307         uint64_t rss_hf;
1308         uint32_t ret;
1309
1310         PMD_INIT_FUNC_TRACE();
1311
1312         cmdInfo->setRSSFields = 0;
1313         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1314         rss_hf = port_rss_conf->rss_hf &
1315                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1316
1317         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1318                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1319         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1320                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1321         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1322                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1323         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1324                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1325
1326         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1327                                VMXNET3_CMD_SET_RSS_FIELDS);
1328         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1329
1330         if (ret != VMXNET3_SUCCESS) {
1331                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1332         }
1333
1334         return ret;
1335 }
1336
1337 /*
1338  * Configure RSS feature
1339  */
1340 int
1341 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1342 {
1343         struct vmxnet3_hw *hw = dev->data->dev_private;
1344         struct VMXNET3_RSSConf *dev_rss_conf;
1345         struct rte_eth_rss_conf *port_rss_conf;
1346         uint64_t rss_hf;
1347         uint8_t i, j;
1348
1349         PMD_INIT_FUNC_TRACE();
1350
1351         dev_rss_conf = hw->rss_conf;
1352         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1353
1354         /* loading hashFunc */
1355         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1356         /* loading hashKeySize */
1357         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1358         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1359         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1360
1361         if (port_rss_conf->rss_key == NULL) {
1362                 /* Default hash key */
1363                 port_rss_conf->rss_key = rss_intel_key;
1364         }
1365
1366         /* loading hashKey */
1367         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1368                dev_rss_conf->hashKeySize);
1369
1370         /* loading indTable */
1371         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1372                 if (j == dev->data->nb_rx_queues)
1373                         j = 0;
1374                 dev_rss_conf->indTable[i] = j;
1375         }
1376
1377         /* loading hashType */
1378         dev_rss_conf->hashType = 0;
1379         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1380         if (rss_hf & ETH_RSS_IPV4)
1381                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1382         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1383                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1384         if (rss_hf & ETH_RSS_IPV6)
1385                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1386         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1387                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1388
1389         return VMXNET3_SUCCESS;
1390 }