73a3cc3a2e33c1039535f4b77ec8d7489b93be7d
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN | \
53                 PKT_TX_IPV6 |     \
54                 PKT_TX_IPV4 |     \
55                 PKT_TX_L4_MASK |  \
56                 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
59         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74         uint32_t avail = 0;
75
76         if (rxq == NULL)
77                 return;
78
79         PMD_RX_LOG(DEBUG,
80                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82         PMD_RX_LOG(DEBUG,
83                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84                    (unsigned long)rxq->cmd_ring[0].basePA,
85                    (unsigned long)rxq->cmd_ring[1].basePA,
86                    (unsigned long)rxq->comp_ring.basePA);
87
88         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89         PMD_RX_LOG(DEBUG,
90                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91                    (uint32_t)rxq->cmd_ring[0].size, avail,
92                    rxq->comp_ring.next2proc,
93                    rxq->cmd_ring[0].size - avail);
94
95         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98                    rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105         uint32_t avail = 0;
106
107         if (txq == NULL)
108                 return;
109
110         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113                    (unsigned long)txq->cmd_ring.basePA,
114                    (unsigned long)txq->comp_ring.basePA,
115                    (unsigned long)txq->data_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119                    (uint32_t)txq->cmd_ring.size, avail,
120                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127         while (ring->next2comp != ring->next2fill) {
128                 /* No need to worry about desc ownership, device is quiesced by now. */
129                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131                 if (buf_info->m) {
132                         rte_pktmbuf_free(buf_info->m);
133                         buf_info->m = NULL;
134                         buf_info->bufPA = 0;
135                         buf_info->len = 0;
136                 }
137                 vmxnet3_cmd_ring_adv_next2comp(ring);
138         }
139 }
140
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144         uint32_t i;
145
146         for (i = 0; i < ring->size; i++) {
147                 /* No need to worry about desc ownership, device is quiesced by now. */
148                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150                 if (buf_info->m) {
151                         rte_pktmbuf_free_seg(buf_info->m);
152                         buf_info->m = NULL;
153                         buf_info->bufPA = 0;
154                         buf_info->len = 0;
155                 }
156                 vmxnet3_cmd_ring_adv_next2comp(ring);
157         }
158 }
159
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
169 {
170         vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
171
172         if (tq != NULL) {
173                 /* Release mbufs */
174                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177                 /* Release the memzone */
178                 rte_memzone_free(tq->mz);
179                 /* Release the queue */
180                 rte_free(tq);
181         }
182 }
183
184 void
185 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
186 {
187         int i;
188         vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
189
190         if (rq != NULL) {
191                 /* Release mbufs */
192                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195                 /* Release both the cmd_rings */
196                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199                 /* Release the memzone */
200                 rte_memzone_free(rq->mz);
201
202                 /* Release the queue */
203                 rte_free(rq);
204         }
205 }
206
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210         vmxnet3_tx_queue_t *tq = txq;
211         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214         int size;
215
216         if (tq != NULL) {
217                 /* Release the cmd_ring mbufs */
218                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219         }
220
221         /* Tx vmxnet rings structure initialization*/
222         ring->next2fill = 0;
223         ring->next2comp = 0;
224         ring->gen = VMXNET3_INIT_GEN;
225         comp_ring->next2proc = 0;
226         comp_ring->gen = VMXNET3_INIT_GEN;
227
228         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230         size += tq->txdata_desc_size * data_ring->size;
231
232         memset(ring->base, 0, size);
233 }
234
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238         int i;
239         vmxnet3_rx_queue_t *rq = rxq;
240         struct vmxnet3_hw *hw = rq->hw;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244         int size;
245
246         /* Release both the cmd_rings mbufs */
247         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250         ring0 = &rq->cmd_ring[0];
251         ring1 = &rq->cmd_ring[1];
252         comp_ring = &rq->comp_ring;
253
254         /* Rx vmxnet rings structure initialization */
255         ring0->next2fill = 0;
256         ring1->next2fill = 0;
257         ring0->next2comp = 0;
258         ring1->next2comp = 0;
259         ring0->gen = VMXNET3_INIT_GEN;
260         ring1->gen = VMXNET3_INIT_GEN;
261         comp_ring->next2proc = 0;
262         comp_ring->gen = VMXNET3_INIT_GEN;
263
264         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267                 size += rq->data_desc_size * data_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303
304         /* Release cmd_ring descriptor and free mbuf */
305         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308         if (mbuf == NULL)
309                 rte_panic("EOP desc does not point to a valid mbuf");
310         rte_pktmbuf_free(mbuf);
311
312         txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314         while (txq->cmd_ring.next2comp != eop_idx) {
315                 /* no out-of-order completion */
316                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318                 completed++;
319         }
320
321         /* Mark the txd for which tcd was generated as completed */
322         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324         return completed + 1;
325 }
326
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330         int completed = 0;
331         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333                 (comp_ring->base + comp_ring->next2proc);
334
335         while (tcd->gen == comp_ring->gen) {
336                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340                                                     comp_ring->next2proc);
341         }
342
343         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348         uint16_t nb_pkts)
349 {
350         int32_t ret;
351         uint32_t i;
352         uint64_t ol_flags;
353         struct rte_mbuf *m;
354
355         for (i = 0; i != nb_pkts; i++) {
356                 m = tx_pkts[i];
357                 ol_flags = m->ol_flags;
358
359                 /* Non-TSO packet cannot occupy more than
360                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361                  */
362                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364                         rte_errno = EINVAL;
365                         return i;
366                 }
367
368                 /* check that only supported TX offloads are requested. */
369                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370                                 (ol_flags & PKT_TX_L4_MASK) ==
371                                 PKT_TX_SCTP_CKSUM) {
372                         rte_errno = ENOTSUP;
373                         return i;
374                 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377                 ret = rte_validate_tx_offload(m);
378                 if (ret != 0) {
379                         rte_errno = -ret;
380                         return i;
381                 }
382 #endif
383                 ret = rte_net_intel_cksum_prepare(m);
384                 if (ret != 0) {
385                         rte_errno = -ret;
386                         return i;
387                 }
388         }
389
390         return i;
391 }
392
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395                   uint16_t nb_pkts)
396 {
397         uint16_t nb_tx;
398         vmxnet3_tx_queue_t *txq = tx_queue;
399         struct vmxnet3_hw *hw = txq->hw;
400         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403         if (unlikely(txq->stopped)) {
404                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405                 return 0;
406         }
407
408         /* Free up the comp_descriptors aggressively */
409         vmxnet3_tq_tx_complete(txq);
410
411         nb_tx = 0;
412         while (nb_tx < nb_pkts) {
413                 Vmxnet3_GenericDesc *gdesc;
414                 vmxnet3_buf_info_t *tbi;
415                 uint32_t first2fill, avail, dw2;
416                 struct rte_mbuf *txm = tx_pkts[nb_tx];
417                 struct rte_mbuf *m_seg = txm;
418                 int copy_size = 0;
419                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420                 /* # of descriptors needed for a packet. */
421                 unsigned count = txm->nb_segs;
422
423                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424                 if (count > avail) {
425                         /* Is command ring full? */
426                         if (unlikely(avail == 0)) {
427                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428                                 txq->stats.tx_ring_full++;
429                                 txq->stats.drop_total += (nb_pkts - nb_tx);
430                                 break;
431                         }
432
433                         /* Command ring is not full but cannot handle the
434                          * multi-segmented packet. Let's try the next packet
435                          * in this case.
436                          */
437                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438                                    "(avail %d needed %d)", avail, count);
439                         txq->stats.drop_total++;
440                         if (tso)
441                                 txq->stats.drop_tso++;
442                         rte_pktmbuf_free(txm);
443                         nb_tx++;
444                         continue;
445                 }
446
447                 /* Drop non-TSO packet that is excessively fragmented */
448                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451                         txq->stats.drop_too_many_segs++;
452                         txq->stats.drop_total++;
453                         rte_pktmbuf_free(txm);
454                         nb_tx++;
455                         continue;
456                 }
457
458                 if (txm->nb_segs == 1 &&
459                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460                         struct Vmxnet3_TxDataDesc *tdd;
461
462                         /* Skip empty packets */
463                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464                                 txq->stats.drop_total++;
465                                 rte_pktmbuf_free(txm);
466                                 nb_tx++;
467                                 continue;
468                         }
469
470                         tdd = (struct Vmxnet3_TxDataDesc *)
471                                 ((uint8 *)txq->data_ring.base +
472                                  txq->cmd_ring.next2fill *
473                                  txq->txdata_desc_size);
474                         copy_size = rte_pktmbuf_pkt_len(txm);
475                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476                 }
477
478                 /* use the previous gen bit for the SOP desc */
479                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480                 first2fill = txq->cmd_ring.next2fill;
481                 do {
482                         /* Remember the transmit buffer for cleanup */
483                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485                         /* NB: the following assumes that VMXNET3 maximum
486                          * transmit buffer size (16K) is greater than
487                          * maximum size of mbuf segment size.
488                          */
489                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491                         /* Skip empty segments */
492                         if (unlikely(m_seg->data_len == 0))
493                                 continue;
494
495                         if (copy_size) {
496                                 uint64 offset =
497                                         (uint64)txq->cmd_ring.next2fill *
498                                                         txq->txdata_desc_size;
499                                 gdesc->txd.addr =
500                                         rte_cpu_to_le_64(txq->data_ring.basePA +
501                                                          offset);
502                         } else {
503                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504                         }
505
506                         gdesc->dword[2] = dw2 | m_seg->data_len;
507                         gdesc->dword[3] = 0;
508
509                         /* move to the next2fill descriptor */
510                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512                         /* use the right gen for non-SOP desc */
513                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514                 } while ((m_seg = m_seg->next) != NULL);
515
516                 /* set the last buf_info for the pkt */
517                 tbi->m = txm;
518                 /* Update the EOP descriptor */
519                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521                 /* Add VLAN tag if present */
522                 gdesc = txq->cmd_ring.base + first2fill;
523                 if (txm->ol_flags & PKT_TX_VLAN) {
524                         gdesc->txd.ti = 1;
525                         gdesc->txd.tci = txm->vlan_tci;
526                 }
527
528                 if (tso) {
529                         uint16_t mss = txm->tso_segsz;
530
531                         RTE_ASSERT(mss > 0);
532
533                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534                         gdesc->txd.om = VMXNET3_OM_TSO;
535                         gdesc->txd.msscof = mss;
536
537                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539                         gdesc->txd.om = VMXNET3_OM_CSUM;
540                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
543                         case PKT_TX_TCP_CKSUM:
544                                 gdesc->txd.msscof = gdesc->txd.hlen +
545                                         offsetof(struct rte_tcp_hdr, cksum);
546                                 break;
547                         case PKT_TX_UDP_CKSUM:
548                                 gdesc->txd.msscof = gdesc->txd.hlen +
549                                         offsetof(struct rte_udp_hdr,
550                                                 dgram_cksum);
551                                 break;
552                         default:
553                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554                                            txm->ol_flags & PKT_TX_L4_MASK);
555                                 abort();
556                         }
557                         deferred++;
558                 } else {
559                         gdesc->txd.hlen = 0;
560                         gdesc->txd.om = VMXNET3_OM_NONE;
561                         gdesc->txd.msscof = 0;
562                         deferred++;
563                 }
564
565                 /* flip the GEN bit on the SOP */
566                 rte_compiler_barrier();
567                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568
569                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570                 nb_tx++;
571         }
572
573         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574
575         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576                 txq_ctrl->txNumDeferred = 0;
577                 /* Notify vSwitch that packets are available. */
578                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579                                        txq->cmd_ring.next2fill);
580         }
581
582         return nb_tx;
583 }
584
585 static inline void
586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587                    struct rte_mbuf *mbuf)
588 {
589         uint32_t val;
590         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591         struct Vmxnet3_RxDesc *rxd =
592                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594
595         if (ring_id == 0) {
596                 /* Usually: One HEAD type buf per packet
597                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599                  */
600
601                 /* We use single packet buffer so all heads here */
602                 val = VMXNET3_RXD_BTYPE_HEAD;
603         } else {
604                 /* All BODY type buffers for 2nd ring */
605                 val = VMXNET3_RXD_BTYPE_BODY;
606         }
607
608         /*
609          * Load mbuf pointer into buf_info[ring_size]
610          * buf_info structure is equivalent to cookie for virtio-virtqueue
611          */
612         buf_info->m = mbuf;
613         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615
616         /* Load Rx Descriptor with the buffer's GPA */
617         rxd->addr = buf_info->bufPA;
618
619         /* After this point rxd->addr MUST not be NULL */
620         rxd->btype = val;
621         rxd->len = buf_info->len;
622         /* Flip gen bit at the end to change ownership */
623         rxd->gen = ring->gen;
624
625         vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
629  *  so that device can receive packets in those buffers.
630  *  Ring layout:
631  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
632  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
633  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
635  *      only for LRO.
636  */
637 static int
638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640         int err = 0;
641         uint32_t i = 0;
642         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643
644         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645                 struct rte_mbuf *mbuf;
646
647                 /* Allocate blank mbuf for the current Rx Descriptor */
648                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
649                 if (unlikely(mbuf == NULL)) {
650                         PMD_RX_LOG(ERR, "Error allocating mbuf");
651                         rxq->stats.rx_buf_alloc_failure++;
652                         err = ENOMEM;
653                         break;
654                 }
655
656                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
657                 i++;
658         }
659
660         /* Return error only if no buffers are posted at present */
661         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662                 return -err;
663         else
664                 return i;
665 }
666
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670                 struct rte_mbuf *rxm)
671 {
672         uint32_t hlen, slen;
673         struct rte_ipv4_hdr *ipv4_hdr;
674         struct rte_ipv6_hdr *ipv6_hdr;
675         struct rte_tcp_hdr *tcp_hdr;
676         char *ptr;
677         uint8_t segs;
678
679         RTE_ASSERT(rcd->tcp);
680
681         ptr = rte_pktmbuf_mtod(rxm, char *);
682         slen = rte_pktmbuf_data_len(rxm);
683         hlen = sizeof(struct rte_ether_hdr);
684
685         if (rcd->v4) {
686                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
687                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
688                                         - sizeof(struct rte_tcp_hdr);
689
690                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
691                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
692         } else if (rcd->v6) {
693                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
694                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
695                                         sizeof(struct rte_tcp_hdr);
696
697                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
698                 hlen += sizeof(struct rte_ipv6_hdr);
699                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
700                         int frag;
701
702                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
703                                         &hlen, &frag);
704                 }
705         }
706
707         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
708                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
709                                 sizeof(struct rte_ether_hdr);
710
711         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
712         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
713
714         segs = *vmxnet3_segs_dynfield(rxm);
715         if (segs > 1)
716                 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
717         else
718                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
719 }
720
721 /* Receive side checksum and other offloads */
722 static inline void
723 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
724                 struct rte_mbuf *rxm, const uint8_t sop)
725 {
726         uint64_t ol_flags = rxm->ol_flags;
727         uint32_t packet_type = rxm->packet_type;
728
729         /* Offloads set in sop */
730         if (sop) {
731                 /* Set packet type */
732                 packet_type |= RTE_PTYPE_L2_ETHER;
733
734                 /* Check large packet receive */
735                 if (VMXNET3_VERSION_GE_2(hw) &&
736                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
737                         const Vmxnet3_RxCompDescExt *rcde =
738                                         (const Vmxnet3_RxCompDescExt *)rcd;
739
740                         rxm->tso_segsz = rcde->mss;
741                         *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
742                         ol_flags |= PKT_RX_LRO;
743                 }
744         } else { /* Offloads set in eop */
745                 /* Check for RSS */
746                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
747                         ol_flags |= PKT_RX_RSS_HASH;
748                         rxm->hash.rss = rcd->rssHash;
749                 }
750
751                 /* Check for hardware stripped VLAN tag */
752                 if (rcd->ts) {
753                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
754                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
755                 }
756
757                 /* Check packet type, checksum errors, etc. */
758                 if (rcd->cnc) {
759                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
760                 } else {
761                         if (rcd->v4) {
762                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
763
764                                 if (rcd->ipc)
765                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
766                                 else
767                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
768
769                                 if (rcd->tuc) {
770                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771                                         if (rcd->tcp)
772                                                 packet_type |= RTE_PTYPE_L4_TCP;
773                                         else
774                                                 packet_type |= RTE_PTYPE_L4_UDP;
775                                 } else {
776                                         if (rcd->tcp) {
777                                                 packet_type |= RTE_PTYPE_L4_TCP;
778                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779                                         } else if (rcd->udp) {
780                                                 packet_type |= RTE_PTYPE_L4_UDP;
781                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
782                                         }
783                                 }
784                         } else if (rcd->v6) {
785                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
786
787                                 if (rcd->tuc) {
788                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
789                                         if (rcd->tcp)
790                                                 packet_type |= RTE_PTYPE_L4_TCP;
791                                         else
792                                                 packet_type |= RTE_PTYPE_L4_UDP;
793                                 } else {
794                                         if (rcd->tcp) {
795                                                 packet_type |= RTE_PTYPE_L4_TCP;
796                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
797                                         } else if (rcd->udp) {
798                                                 packet_type |= RTE_PTYPE_L4_UDP;
799                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
800                                         }
801                                 }
802                         } else {
803                                 packet_type |= RTE_PTYPE_UNKNOWN;
804                         }
805
806                         /* Old variants of vmxnet3 do not provide MSS */
807                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
808                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
809                                                 rcd, rxm);
810                 }
811         }
812
813         rxm->ol_flags = ol_flags;
814         rxm->packet_type = packet_type;
815 }
816
817 /*
818  * Process the Rx Completion Ring of given vmxnet3_rx_queue
819  * for nb_pkts burst and return the number of packets received
820  */
821 uint16_t
822 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
823 {
824         uint16_t nb_rx;
825         uint32_t nb_rxd, idx;
826         uint8_t ring_idx;
827         vmxnet3_rx_queue_t *rxq;
828         Vmxnet3_RxCompDesc *rcd;
829         vmxnet3_buf_info_t *rbi;
830         Vmxnet3_RxDesc *rxd;
831         struct rte_mbuf *rxm = NULL;
832         struct vmxnet3_hw *hw;
833
834         nb_rx = 0;
835         ring_idx = 0;
836         nb_rxd = 0;
837         idx = 0;
838
839         rxq = rx_queue;
840         hw = rxq->hw;
841
842         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
843
844         if (unlikely(rxq->stopped)) {
845                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846                 return 0;
847         }
848
849         while (rcd->gen == rxq->comp_ring.gen) {
850                 struct rte_mbuf *newm;
851
852                 if (nb_rx >= nb_pkts)
853                         break;
854
855                 newm = rte_mbuf_raw_alloc(rxq->mp);
856                 if (unlikely(newm == NULL)) {
857                         PMD_RX_LOG(ERR, "Error allocating mbuf");
858                         rxq->stats.rx_buf_alloc_failure++;
859                         break;
860                 }
861
862                 idx = rcd->rxdIdx;
863                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
864                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
865                 RTE_SET_USED(rxd); /* used only for assert when enabled */
866                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
867
868                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
869
870                 RTE_ASSERT(rcd->len <= rxd->len);
871                 RTE_ASSERT(rbi->m);
872
873                 /* Get the packet buffer pointer from buf_info */
874                 rxm = rbi->m;
875
876                 /* Clear descriptor associated buf_info to be reused */
877                 rbi->m = NULL;
878                 rbi->bufPA = 0;
879
880                 /* Update the index that we received a packet */
881                 rxq->cmd_ring[ring_idx].next2comp = idx;
882
883                 /* For RCD with EOP set, check if there is frame error */
884                 if (unlikely(rcd->eop && rcd->err)) {
885                         rxq->stats.drop_total++;
886                         rxq->stats.drop_err++;
887
888                         if (!rcd->fcs) {
889                                 rxq->stats.drop_fcs++;
890                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
891                         }
892                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
893                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
894                                          rxq->comp_ring.base), rcd->rxdIdx);
895                         rte_pktmbuf_free_seg(rxm);
896                         if (rxq->start_seg) {
897                                 struct rte_mbuf *start = rxq->start_seg;
898
899                                 rxq->start_seg = NULL;
900                                 rte_pktmbuf_free(start);
901                         }
902                         goto rcd_done;
903                 }
904
905                 /* Initialize newly received packet buffer */
906                 rxm->port = rxq->port_id;
907                 rxm->nb_segs = 1;
908                 rxm->next = NULL;
909                 rxm->pkt_len = (uint16_t)rcd->len;
910                 rxm->data_len = (uint16_t)rcd->len;
911                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
912                 rxm->ol_flags = 0;
913                 rxm->vlan_tci = 0;
914                 rxm->packet_type = 0;
915
916                 /*
917                  * If this is the first buffer of the received packet,
918                  * set the pointer to the first mbuf of the packet
919                  * Otherwise, update the total length and the number of segments
920                  * of the current scattered packet, and update the pointer to
921                  * the last mbuf of the current packet.
922                  */
923                 if (rcd->sop) {
924                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
925
926                         if (unlikely(rcd->len == 0)) {
927                                 RTE_ASSERT(rcd->eop);
928
929                                 PMD_RX_LOG(DEBUG,
930                                            "Rx buf was skipped. rxring[%d][%d])",
931                                            ring_idx, idx);
932                                 rte_pktmbuf_free_seg(rxm);
933                                 goto rcd_done;
934                         }
935
936                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
937                                 uint8_t *rdd = rxq->data_ring.base +
938                                         idx * rxq->data_desc_size;
939
940                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
941                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942                                            rdd, rcd->len);
943                         }
944
945                         rxq->start_seg = rxm;
946                         rxq->last_seg = rxm;
947                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
948                 } else {
949                         struct rte_mbuf *start = rxq->start_seg;
950
951                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
952
953                         if (likely(start && rxm->data_len > 0)) {
954                                 start->pkt_len += rxm->data_len;
955                                 start->nb_segs++;
956
957                                 rxq->last_seg->next = rxm;
958                                 rxq->last_seg = rxm;
959                         } else {
960                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
961                                 rxq->stats.drop_total++;
962                                 rxq->stats.drop_err++;
963
964                                 rte_pktmbuf_free_seg(rxm);
965                         }
966                 }
967
968                 if (rcd->eop) {
969                         struct rte_mbuf *start = rxq->start_seg;
970
971                         vmxnet3_rx_offload(hw, rcd, start, 0);
972                         rx_pkts[nb_rx++] = start;
973                         rxq->start_seg = NULL;
974                 }
975
976 rcd_done:
977                 rxq->cmd_ring[ring_idx].next2comp = idx;
978                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
979                                           rxq->cmd_ring[ring_idx].size);
980
981                 /* It's time to renew descriptors */
982                 vmxnet3_renew_desc(rxq, ring_idx, newm);
983                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
984                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
985                                                rxq->cmd_ring[ring_idx].next2fill);
986                 }
987
988                 /* Advance to the next descriptor in comp_ring */
989                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
990
991                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
992                 nb_rxd++;
993                 if (nb_rxd > rxq->cmd_ring[0].size) {
994                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
995                                    " relinquish control.");
996                         break;
997                 }
998         }
999
1000         if (unlikely(nb_rxd == 0)) {
1001                 uint32_t avail;
1002                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1003                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1004                         if (unlikely(avail > 0)) {
1005                                 /* try to alloc new buf and renew descriptors */
1006                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1007                         }
1008                 }
1009                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1010                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1011                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1012                                                        rxq->cmd_ring[ring_idx].next2fill);
1013                         }
1014                 }
1015         }
1016
1017         return nb_rx;
1018 }
1019
1020 int
1021 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1022                            uint16_t queue_idx,
1023                            uint16_t nb_desc,
1024                            unsigned int socket_id,
1025                            const struct rte_eth_txconf *tx_conf __rte_unused)
1026 {
1027         struct vmxnet3_hw *hw = dev->data->dev_private;
1028         const struct rte_memzone *mz;
1029         struct vmxnet3_tx_queue *txq;
1030         struct vmxnet3_cmd_ring *ring;
1031         struct vmxnet3_comp_ring *comp_ring;
1032         struct vmxnet3_data_ring *data_ring;
1033         int size;
1034
1035         PMD_INIT_FUNC_TRACE();
1036
1037         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1038                           RTE_CACHE_LINE_SIZE);
1039         if (txq == NULL) {
1040                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1041                 return -ENOMEM;
1042         }
1043
1044         txq->queue_id = queue_idx;
1045         txq->port_id = dev->data->port_id;
1046         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1047         txq->hw = hw;
1048         txq->qid = queue_idx;
1049         txq->stopped = TRUE;
1050         txq->txdata_desc_size = hw->txdata_desc_size;
1051
1052         ring = &txq->cmd_ring;
1053         comp_ring = &txq->comp_ring;
1054         data_ring = &txq->data_ring;
1055
1056         /* Tx vmxnet ring length should be between 512-4096 */
1057         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1058                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1059                              VMXNET3_DEF_TX_RING_SIZE);
1060                 return -EINVAL;
1061         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1062                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1063                              VMXNET3_TX_RING_MAX_SIZE);
1064                 return -EINVAL;
1065         } else {
1066                 ring->size = nb_desc;
1067                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1068         }
1069         comp_ring->size = data_ring->size = ring->size;
1070
1071         /* Tx vmxnet rings structure initialization*/
1072         ring->next2fill = 0;
1073         ring->next2comp = 0;
1074         ring->gen = VMXNET3_INIT_GEN;
1075         comp_ring->next2proc = 0;
1076         comp_ring->gen = VMXNET3_INIT_GEN;
1077
1078         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1079         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1080         size += txq->txdata_desc_size * data_ring->size;
1081
1082         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1083                                       VMXNET3_RING_BA_ALIGN, socket_id);
1084         if (mz == NULL) {
1085                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1086                 return -ENOMEM;
1087         }
1088         txq->mz = mz;
1089         memset(mz->addr, 0, mz->len);
1090
1091         /* cmd_ring initialization */
1092         ring->base = mz->addr;
1093         ring->basePA = mz->iova;
1094
1095         /* comp_ring initialization */
1096         comp_ring->base = ring->base + ring->size;
1097         comp_ring->basePA = ring->basePA +
1098                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1099
1100         /* data_ring initialization */
1101         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1102         data_ring->basePA = comp_ring->basePA +
1103                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1104
1105         /* cmd_ring0 buf_info allocation */
1106         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1107                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1108         if (ring->buf_info == NULL) {
1109                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1110                 return -ENOMEM;
1111         }
1112
1113         /* Update the data portion with txq */
1114         dev->data->tx_queues[queue_idx] = txq;
1115
1116         return 0;
1117 }
1118
1119 int
1120 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1121                            uint16_t queue_idx,
1122                            uint16_t nb_desc,
1123                            unsigned int socket_id,
1124                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1125                            struct rte_mempool *mp)
1126 {
1127         const struct rte_memzone *mz;
1128         struct vmxnet3_rx_queue *rxq;
1129         struct vmxnet3_hw *hw = dev->data->dev_private;
1130         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1131         struct vmxnet3_comp_ring *comp_ring;
1132         struct vmxnet3_rx_data_ring *data_ring;
1133         int size;
1134         uint8_t i;
1135         char mem_name[32];
1136
1137         PMD_INIT_FUNC_TRACE();
1138
1139         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1140                           RTE_CACHE_LINE_SIZE);
1141         if (rxq == NULL) {
1142                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1143                 return -ENOMEM;
1144         }
1145
1146         rxq->mp = mp;
1147         rxq->queue_id = queue_idx;
1148         rxq->port_id = dev->data->port_id;
1149         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1150         rxq->hw = hw;
1151         rxq->qid1 = queue_idx;
1152         rxq->qid2 = queue_idx + hw->num_rx_queues;
1153         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1154         rxq->data_desc_size = hw->rxdata_desc_size;
1155         rxq->stopped = TRUE;
1156
1157         ring0 = &rxq->cmd_ring[0];
1158         ring1 = &rxq->cmd_ring[1];
1159         comp_ring = &rxq->comp_ring;
1160         data_ring = &rxq->data_ring;
1161
1162         /* Rx vmxnet rings length should be between 256-4096 */
1163         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1164                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1165                 return -EINVAL;
1166         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1167                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1168                 return -EINVAL;
1169         } else {
1170                 ring0->size = nb_desc;
1171                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1172                 ring1->size = ring0->size;
1173         }
1174
1175         comp_ring->size = ring0->size + ring1->size;
1176         data_ring->size = ring0->size;
1177
1178         /* Rx vmxnet rings structure initialization */
1179         ring0->next2fill = 0;
1180         ring1->next2fill = 0;
1181         ring0->next2comp = 0;
1182         ring1->next2comp = 0;
1183         ring0->gen = VMXNET3_INIT_GEN;
1184         ring1->gen = VMXNET3_INIT_GEN;
1185         comp_ring->next2proc = 0;
1186         comp_ring->gen = VMXNET3_INIT_GEN;
1187
1188         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1189         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1190         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1191                 size += rxq->data_desc_size * data_ring->size;
1192
1193         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1194                                       VMXNET3_RING_BA_ALIGN, socket_id);
1195         if (mz == NULL) {
1196                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1197                 return -ENOMEM;
1198         }
1199         rxq->mz = mz;
1200         memset(mz->addr, 0, mz->len);
1201
1202         /* cmd_ring0 initialization */
1203         ring0->base = mz->addr;
1204         ring0->basePA = mz->iova;
1205
1206         /* cmd_ring1 initialization */
1207         ring1->base = ring0->base + ring0->size;
1208         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1209
1210         /* comp_ring initialization */
1211         comp_ring->base = ring1->base + ring1->size;
1212         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1213                 ring1->size;
1214
1215         /* data_ring initialization */
1216         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1217                 data_ring->base =
1218                         (uint8_t *)(comp_ring->base + comp_ring->size);
1219                 data_ring->basePA = comp_ring->basePA +
1220                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1221         }
1222
1223         /* cmd_ring0-cmd_ring1 buf_info allocation */
1224         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1225
1226                 ring = &rxq->cmd_ring[i];
1227                 ring->rid = i;
1228                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1229
1230                 ring->buf_info = rte_zmalloc(mem_name,
1231                                              ring->size * sizeof(vmxnet3_buf_info_t),
1232                                              RTE_CACHE_LINE_SIZE);
1233                 if (ring->buf_info == NULL) {
1234                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1235                         return -ENOMEM;
1236                 }
1237         }
1238
1239         /* Update the data portion with rxq */
1240         dev->data->rx_queues[queue_idx] = rxq;
1241
1242         return 0;
1243 }
1244
1245 /*
1246  * Initializes Receive Unit
1247  * Load mbufs in rx queue in advance
1248  */
1249 int
1250 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1251 {
1252         struct vmxnet3_hw *hw = dev->data->dev_private;
1253
1254         int i, ret;
1255         uint8_t j;
1256
1257         PMD_INIT_FUNC_TRACE();
1258
1259         for (i = 0; i < hw->num_rx_queues; i++) {
1260                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1261
1262                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1263                         /* Passing 0 as alloc_num will allocate full ring */
1264                         ret = vmxnet3_post_rx_bufs(rxq, j);
1265                         if (ret <= 0) {
1266                                 PMD_INIT_LOG(ERR,
1267                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1268                                              i, j);
1269                                 return -ret;
1270                         }
1271                         /*
1272                          * Updating device with the index:next2fill to fill the
1273                          * mbufs for coming packets.
1274                          */
1275                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1276                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1277                                                        rxq->cmd_ring[j].next2fill);
1278                         }
1279                 }
1280                 rxq->stopped = FALSE;
1281                 rxq->start_seg = NULL;
1282         }
1283
1284         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1285                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1286
1287                 txq->stopped = FALSE;
1288         }
1289
1290         return 0;
1291 }
1292
1293 static uint8_t rss_intel_key[40] = {
1294         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1295         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1296         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1297         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1298         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1299 };
1300
1301 /*
1302  * Additional RSS configurations based on vmxnet v4+ APIs
1303  */
1304 int
1305 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1306 {
1307         struct vmxnet3_hw *hw = dev->data->dev_private;
1308         Vmxnet3_DriverShared *shared = hw->shared;
1309         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1310         struct rte_eth_rss_conf *port_rss_conf;
1311         uint64_t rss_hf;
1312         uint32_t ret;
1313
1314         PMD_INIT_FUNC_TRACE();
1315
1316         cmdInfo->setRSSFields = 0;
1317         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1318
1319         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1320             VMXNET3_MANDATORY_V4_RSS) {
1321                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1322                              "automatically setting it");
1323                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1324         }
1325
1326         rss_hf = port_rss_conf->rss_hf &
1327                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1328
1329         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1330                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1331         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1332                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1333         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1334                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1335         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1336                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1337
1338         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1339                                VMXNET3_CMD_SET_RSS_FIELDS);
1340         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1341
1342         if (ret != VMXNET3_SUCCESS) {
1343                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1344         }
1345
1346         return ret;
1347 }
1348
1349 /*
1350  * Configure RSS feature
1351  */
1352 int
1353 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1354 {
1355         struct vmxnet3_hw *hw = dev->data->dev_private;
1356         struct VMXNET3_RSSConf *dev_rss_conf;
1357         struct rte_eth_rss_conf *port_rss_conf;
1358         uint64_t rss_hf;
1359         uint8_t i, j;
1360
1361         PMD_INIT_FUNC_TRACE();
1362
1363         dev_rss_conf = hw->rss_conf;
1364         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1365
1366         /* loading hashFunc */
1367         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1368         /* loading hashKeySize */
1369         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1370         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1371         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1372
1373         if (port_rss_conf->rss_key == NULL) {
1374                 /* Default hash key */
1375                 port_rss_conf->rss_key = rss_intel_key;
1376         }
1377
1378         /* loading hashKey */
1379         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1380                dev_rss_conf->hashKeySize);
1381
1382         /* loading indTable */
1383         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1384                 if (j == dev->data->nb_rx_queues)
1385                         j = 0;
1386                 dev_rss_conf->indTable[i] = j;
1387         }
1388
1389         /* loading hashType */
1390         dev_rss_conf->hashType = 0;
1391         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1392         if (rss_hf & RTE_ETH_RSS_IPV4)
1393                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1394         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1395                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1396         if (rss_hf & RTE_ETH_RSS_IPV6)
1397                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1398         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1399                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1400
1401         return VMXNET3_SUCCESS;
1402 }