f6e2d987778481dce148f83603164b2bf4fc2d43
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_L4_MASK |  \
54                 PKT_TX_TCP_SEG)
55
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
57         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
58
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
60
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
66 #endif
67
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
69 static void
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
71 {
72         uint32_t avail = 0;
73
74         if (rxq == NULL)
75                 return;
76
77         PMD_RX_LOG(DEBUG,
78                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
80         PMD_RX_LOG(DEBUG,
81                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82                    (unsigned long)rxq->cmd_ring[0].basePA,
83                    (unsigned long)rxq->cmd_ring[1].basePA,
84                    (unsigned long)rxq->comp_ring.basePA);
85
86         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
87         PMD_RX_LOG(DEBUG,
88                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89                    (uint32_t)rxq->cmd_ring[0].size, avail,
90                    rxq->comp_ring.next2proc,
91                    rxq->cmd_ring[0].size - avail);
92
93         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96                    rxq->cmd_ring[1].size - avail);
97
98 }
99
100 static void
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
102 {
103         uint32_t avail = 0;
104
105         if (txq == NULL)
106                 return;
107
108         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111                    (unsigned long)txq->cmd_ring.basePA,
112                    (unsigned long)txq->comp_ring.basePA,
113                    (unsigned long)txq->data_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117                    (uint32_t)txq->cmd_ring.size, avail,
118                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
119 }
120 #endif
121
122 static void
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
124 {
125         while (ring->next2comp != ring->next2fill) {
126                 /* No need to worry about desc ownership, device is quiesced by now. */
127                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
128
129                 if (buf_info->m) {
130                         rte_pktmbuf_free(buf_info->m);
131                         buf_info->m = NULL;
132                         buf_info->bufPA = 0;
133                         buf_info->len = 0;
134                 }
135                 vmxnet3_cmd_ring_adv_next2comp(ring);
136         }
137 }
138
139 static void
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
141 {
142         uint32_t i;
143
144         for (i = 0; i < ring->size; i++) {
145                 /* No need to worry about desc ownership, device is quiesced by now. */
146                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
147
148                 if (buf_info->m) {
149                         rte_pktmbuf_free_seg(buf_info->m);
150                         buf_info->m = NULL;
151                         buf_info->bufPA = 0;
152                         buf_info->len = 0;
153                 }
154                 vmxnet3_cmd_ring_adv_next2comp(ring);
155         }
156 }
157
158 static void
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
160 {
161         rte_free(ring->buf_info);
162         ring->buf_info = NULL;
163 }
164
165 void
166 vmxnet3_dev_tx_queue_release(void *txq)
167 {
168         vmxnet3_tx_queue_t *tq = txq;
169
170         if (tq != NULL) {
171                 /* Release mbufs */
172                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173                 /* Release the cmd_ring */
174                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175                 /* Release the memzone */
176                 rte_memzone_free(tq->mz);
177                 /* Release the queue */
178                 rte_free(tq);
179         }
180 }
181
182 void
183 vmxnet3_dev_rx_queue_release(void *rxq)
184 {
185         int i;
186         vmxnet3_rx_queue_t *rq = rxq;
187
188         if (rq != NULL) {
189                 /* Release mbufs */
190                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
192
193                 /* Release both the cmd_rings */
194                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
196
197                 /* Release the memzone */
198                 rte_memzone_free(rq->mz);
199
200                 /* Release the queue */
201                 rte_free(rq);
202         }
203 }
204
205 static void
206 vmxnet3_dev_tx_queue_reset(void *txq)
207 {
208         vmxnet3_tx_queue_t *tq = txq;
209         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
212         int size;
213
214         if (tq != NULL) {
215                 /* Release the cmd_ring mbufs */
216                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
217         }
218
219         /* Tx vmxnet rings structure initialization*/
220         ring->next2fill = 0;
221         ring->next2comp = 0;
222         ring->gen = VMXNET3_INIT_GEN;
223         comp_ring->next2proc = 0;
224         comp_ring->gen = VMXNET3_INIT_GEN;
225
226         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228         size += tq->txdata_desc_size * data_ring->size;
229
230         memset(ring->base, 0, size);
231 }
232
233 static void
234 vmxnet3_dev_rx_queue_reset(void *rxq)
235 {
236         int i;
237         vmxnet3_rx_queue_t *rq = rxq;
238         struct vmxnet3_hw *hw = rq->hw;
239         struct vmxnet3_cmd_ring *ring0, *ring1;
240         struct vmxnet3_comp_ring *comp_ring;
241         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
242         int size;
243
244         /* Release both the cmd_rings mbufs */
245         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
247
248         ring0 = &rq->cmd_ring[0];
249         ring1 = &rq->cmd_ring[1];
250         comp_ring = &rq->comp_ring;
251
252         /* Rx vmxnet rings structure initialization */
253         ring0->next2fill = 0;
254         ring1->next2fill = 0;
255         ring0->next2comp = 0;
256         ring1->next2comp = 0;
257         ring0->gen = VMXNET3_INIT_GEN;
258         ring1->gen = VMXNET3_INIT_GEN;
259         comp_ring->next2proc = 0;
260         comp_ring->gen = VMXNET3_INIT_GEN;
261
262         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265                 size += rq->data_desc_size * data_ring->size;
266
267         memset(ring0->base, 0, size);
268 }
269
270 void
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
272 {
273         unsigned i;
274
275         PMD_INIT_FUNC_TRACE();
276
277         for (i = 0; i < dev->data->nb_tx_queues; i++) {
278                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
279
280                 if (txq != NULL) {
281                         txq->stopped = TRUE;
282                         vmxnet3_dev_tx_queue_reset(txq);
283                 }
284         }
285
286         for (i = 0; i < dev->data->nb_rx_queues; i++) {
287                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
288
289                 if (rxq != NULL) {
290                         rxq->stopped = TRUE;
291                         vmxnet3_dev_rx_queue_reset(rxq);
292                 }
293         }
294 }
295
296 static int
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
298 {
299         int completed = 0;
300         struct rte_mbuf *mbuf;
301
302         /* Release cmd_ring descriptor and free mbuf */
303         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
304
305         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
306         if (mbuf == NULL)
307                 rte_panic("EOP desc does not point to a valid mbuf");
308         rte_pktmbuf_free(mbuf);
309
310         txq->cmd_ring.buf_info[eop_idx].m = NULL;
311
312         while (txq->cmd_ring.next2comp != eop_idx) {
313                 /* no out-of-order completion */
314                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
316                 completed++;
317         }
318
319         /* Mark the txd for which tcd was generated as completed */
320         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
321
322         return completed + 1;
323 }
324
325 static void
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
327 {
328         int completed = 0;
329         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331                 (comp_ring->base + comp_ring->next2proc);
332
333         while (tcd->gen == comp_ring->gen) {
334                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
335
336                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338                                                     comp_ring->next2proc);
339         }
340
341         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
342 }
343
344 uint16_t
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
346         uint16_t nb_pkts)
347 {
348         int32_t ret;
349         uint32_t i;
350         uint64_t ol_flags;
351         struct rte_mbuf *m;
352
353         for (i = 0; i != nb_pkts; i++) {
354                 m = tx_pkts[i];
355                 ol_flags = m->ol_flags;
356
357                 /* Non-TSO packet cannot occupy more than
358                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
359                  */
360                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
362                         rte_errno = -EINVAL;
363                         return i;
364                 }
365
366                 /* check that only supported TX offloads are requested. */
367                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368                                 (ol_flags & PKT_TX_L4_MASK) ==
369                                 PKT_TX_SCTP_CKSUM) {
370                         rte_errno = -ENOTSUP;
371                         return i;
372                 }
373
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375                 ret = rte_validate_tx_offload(m);
376                 if (ret != 0) {
377                         rte_errno = ret;
378                         return i;
379                 }
380 #endif
381                 ret = rte_net_intel_cksum_prepare(m);
382                 if (ret != 0) {
383                         rte_errno = ret;
384                         return i;
385                 }
386         }
387
388         return i;
389 }
390
391 uint16_t
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
393                   uint16_t nb_pkts)
394 {
395         uint16_t nb_tx;
396         vmxnet3_tx_queue_t *txq = tx_queue;
397         struct vmxnet3_hw *hw = txq->hw;
398         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
400
401         if (unlikely(txq->stopped)) {
402                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
403                 return 0;
404         }
405
406         /* Free up the comp_descriptors aggressively */
407         vmxnet3_tq_tx_complete(txq);
408
409         nb_tx = 0;
410         while (nb_tx < nb_pkts) {
411                 Vmxnet3_GenericDesc *gdesc;
412                 vmxnet3_buf_info_t *tbi;
413                 uint32_t first2fill, avail, dw2;
414                 struct rte_mbuf *txm = tx_pkts[nb_tx];
415                 struct rte_mbuf *m_seg = txm;
416                 int copy_size = 0;
417                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418                 /* # of descriptors needed for a packet. */
419                 unsigned count = txm->nb_segs;
420
421                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
422                 if (count > avail) {
423                         /* Is command ring full? */
424                         if (unlikely(avail == 0)) {
425                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426                                 txq->stats.tx_ring_full++;
427                                 txq->stats.drop_total += (nb_pkts - nb_tx);
428                                 break;
429                         }
430
431                         /* Command ring is not full but cannot handle the
432                          * multi-segmented packet. Let's try the next packet
433                          * in this case.
434                          */
435                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436                                    "(avail %d needed %d)", avail, count);
437                         txq->stats.drop_total++;
438                         if (tso)
439                                 txq->stats.drop_tso++;
440                         rte_pktmbuf_free(txm);
441                         nb_tx++;
442                         continue;
443                 }
444
445                 /* Drop non-TSO packet that is excessively fragmented */
446                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449                         txq->stats.drop_too_many_segs++;
450                         txq->stats.drop_total++;
451                         rte_pktmbuf_free(txm);
452                         nb_tx++;
453                         continue;
454                 }
455
456                 if (txm->nb_segs == 1 &&
457                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458                         struct Vmxnet3_TxDataDesc *tdd;
459
460                         /* Skip empty packets */
461                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
462                                 txq->stats.drop_total++;
463                                 rte_pktmbuf_free(txm);
464                                 nb_tx++;
465                                 continue;
466                         }
467
468                         tdd = (struct Vmxnet3_TxDataDesc *)
469                                 ((uint8 *)txq->data_ring.base +
470                                  txq->cmd_ring.next2fill *
471                                  txq->txdata_desc_size);
472                         copy_size = rte_pktmbuf_pkt_len(txm);
473                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
474                 }
475
476                 /* use the previous gen bit for the SOP desc */
477                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
478                 first2fill = txq->cmd_ring.next2fill;
479                 do {
480                         /* Remember the transmit buffer for cleanup */
481                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
482
483                         /* NB: the following assumes that VMXNET3 maximum
484                          * transmit buffer size (16K) is greater than
485                          * maximum size of mbuf segment size.
486                          */
487                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
488
489                         /* Skip empty segments */
490                         if (unlikely(m_seg->data_len == 0))
491                                 continue;
492
493                         if (copy_size) {
494                                 uint64 offset =
495                                         (uint64)txq->cmd_ring.next2fill *
496                                                         txq->txdata_desc_size;
497                                 gdesc->txd.addr =
498                                         rte_cpu_to_le_64(txq->data_ring.basePA +
499                                                          offset);
500                         } else {
501                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
502                         }
503
504                         gdesc->dword[2] = dw2 | m_seg->data_len;
505                         gdesc->dword[3] = 0;
506
507                         /* move to the next2fill descriptor */
508                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
509
510                         /* use the right gen for non-SOP desc */
511                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
512                 } while ((m_seg = m_seg->next) != NULL);
513
514                 /* set the last buf_info for the pkt */
515                 tbi->m = txm;
516                 /* Update the EOP descriptor */
517                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
518
519                 /* Add VLAN tag if present */
520                 gdesc = txq->cmd_ring.base + first2fill;
521                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
522                         gdesc->txd.ti = 1;
523                         gdesc->txd.tci = txm->vlan_tci;
524                 }
525
526                 if (tso) {
527                         uint16_t mss = txm->tso_segsz;
528
529                         RTE_ASSERT(mss > 0);
530
531                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
532                         gdesc->txd.om = VMXNET3_OM_TSO;
533                         gdesc->txd.msscof = mss;
534
535                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
536                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
537                         gdesc->txd.om = VMXNET3_OM_CSUM;
538                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
539
540                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
541                         case PKT_TX_TCP_CKSUM:
542                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
543                                 break;
544                         case PKT_TX_UDP_CKSUM:
545                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
546                                 break;
547                         default:
548                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
549                                            txm->ol_flags & PKT_TX_L4_MASK);
550                                 abort();
551                         }
552                         deferred++;
553                 } else {
554                         gdesc->txd.hlen = 0;
555                         gdesc->txd.om = VMXNET3_OM_NONE;
556                         gdesc->txd.msscof = 0;
557                         deferred++;
558                 }
559
560                 /* flip the GEN bit on the SOP */
561                 rte_compiler_barrier();
562                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
563
564                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
565                 nb_tx++;
566         }
567
568         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
569
570         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
571                 txq_ctrl->txNumDeferred = 0;
572                 /* Notify vSwitch that packets are available. */
573                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
574                                        txq->cmd_ring.next2fill);
575         }
576
577         return nb_tx;
578 }
579
580 static inline void
581 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
582                    struct rte_mbuf *mbuf)
583 {
584         uint32_t val;
585         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
586         struct Vmxnet3_RxDesc *rxd =
587                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
588         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
589
590         if (ring_id == 0) {
591                 /* Usually: One HEAD type buf per packet
592                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
593                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
594                  */
595
596                 /* We use single packet buffer so all heads here */
597                 val = VMXNET3_RXD_BTYPE_HEAD;
598         } else {
599                 /* All BODY type buffers for 2nd ring */
600                 val = VMXNET3_RXD_BTYPE_BODY;
601         }
602
603         /*
604          * Load mbuf pointer into buf_info[ring_size]
605          * buf_info structure is equivalent to cookie for virtio-virtqueue
606          */
607         buf_info->m = mbuf;
608         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
609         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
610
611         /* Load Rx Descriptor with the buffer's GPA */
612         rxd->addr = buf_info->bufPA;
613
614         /* After this point rxd->addr MUST not be NULL */
615         rxd->btype = val;
616         rxd->len = buf_info->len;
617         /* Flip gen bit at the end to change ownership */
618         rxd->gen = ring->gen;
619
620         vmxnet3_cmd_ring_adv_next2fill(ring);
621 }
622 /*
623  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
624  *  so that device can receive packets in those buffers.
625  *  Ring layout:
626  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
627  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
628  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
629  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
630  *      only for LRO.
631  */
632 static int
633 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
634 {
635         int err = 0;
636         uint32_t i = 0;
637         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
638
639         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640                 struct rte_mbuf *mbuf;
641
642                 /* Allocate blank mbuf for the current Rx Descriptor */
643                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
644                 if (unlikely(mbuf == NULL)) {
645                         PMD_RX_LOG(ERR, "Error allocating mbuf");
646                         rxq->stats.rx_buf_alloc_failure++;
647                         err = ENOMEM;
648                         break;
649                 }
650
651                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
652                 i++;
653         }
654
655         /* Return error only if no buffers are posted at present */
656         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
657                 return -err;
658         else
659                 return i;
660 }
661
662 /* MSS not provided by vmxnet3, guess one with available information */
663 static uint16_t
664 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
665                 struct rte_mbuf *rxm)
666 {
667         uint32_t hlen, slen;
668         struct ipv4_hdr *ipv4_hdr;
669         struct ipv6_hdr *ipv6_hdr;
670         struct tcp_hdr *tcp_hdr;
671         char *ptr;
672
673         RTE_ASSERT(rcd->tcp);
674
675         ptr = rte_pktmbuf_mtod(rxm, char *);
676         slen = rte_pktmbuf_data_len(rxm);
677         hlen = sizeof(struct ether_hdr);
678
679         if (rcd->v4) {
680                 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
681                         return hw->mtu - sizeof(struct ipv4_hdr)
682                                         - sizeof(struct tcp_hdr);
683
684                 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
685                 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
686                                 IPV4_IHL_MULTIPLIER;
687         } else if (rcd->v6) {
688                 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
689                         return hw->mtu - sizeof(struct ipv6_hdr) -
690                                         sizeof(struct tcp_hdr);
691
692                 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
693                 hlen += sizeof(struct ipv6_hdr);
694                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
695                         int frag;
696
697                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
698                                         &hlen, &frag);
699                 }
700         }
701
702         if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
703                 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
704                                 sizeof(struct ether_hdr);
705
706         tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
707         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
708
709         if (rxm->udata64 > 1)
710                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
711                                 rxm->udata64 - 1) / rxm->udata64;
712         else
713                 return hw->mtu - hlen + sizeof(struct ether_hdr);
714 }
715
716 /* Receive side checksum and other offloads */
717 static inline void
718 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
719                 struct rte_mbuf *rxm, const uint8_t sop)
720 {
721         uint64_t ol_flags = rxm->ol_flags;
722         uint32_t packet_type = rxm->packet_type;
723
724         /* Offloads set in sop */
725         if (sop) {
726                 /* Set packet type */
727                 packet_type |= RTE_PTYPE_L2_ETHER;
728
729                 /* Check large packet receive */
730                 if (VMXNET3_VERSION_GE_2(hw) &&
731                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
732                         const Vmxnet3_RxCompDescExt *rcde =
733                                         (const Vmxnet3_RxCompDescExt *)rcd;
734
735                         rxm->tso_segsz = rcde->mss;
736                         rxm->udata64 = rcde->segCnt;
737                         ol_flags |= PKT_RX_LRO;
738                 }
739         } else { /* Offloads set in eop */
740                 /* Check for RSS */
741                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
742                         ol_flags |= PKT_RX_RSS_HASH;
743                         rxm->hash.rss = rcd->rssHash;
744                 }
745
746                 /* Check for hardware stripped VLAN tag */
747                 if (rcd->ts) {
748                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
749                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
750                 }
751
752                 /* Check packet type, checksum errors, etc. */
753                 if (rcd->cnc) {
754                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
755                 } else {
756                         if (rcd->v4) {
757                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
758
759                                 if (rcd->ipc)
760                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
761                                 else
762                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
763
764                                 if (rcd->tuc) {
765                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
766                                         if (rcd->tcp)
767                                                 packet_type |= RTE_PTYPE_L4_TCP;
768                                         else
769                                                 packet_type |= RTE_PTYPE_L4_UDP;
770                                 } else {
771                                         if (rcd->tcp) {
772                                                 packet_type |= RTE_PTYPE_L4_TCP;
773                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
774                                         } else if (rcd->udp) {
775                                                 packet_type |= RTE_PTYPE_L4_UDP;
776                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
777                                         }
778                                 }
779                         } else if (rcd->v6) {
780                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
781
782                                 if (rcd->tuc) {
783                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
784                                         if (rcd->tcp)
785                                                 packet_type |= RTE_PTYPE_L4_TCP;
786                                         else
787                                                 packet_type |= RTE_PTYPE_L4_UDP;
788                                 } else {
789                                         if (rcd->tcp) {
790                                                 packet_type |= RTE_PTYPE_L4_TCP;
791                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
792                                         } else if (rcd->udp) {
793                                                 packet_type |= RTE_PTYPE_L4_UDP;
794                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
795                                         }
796                                 }
797                         } else {
798                                 packet_type |= RTE_PTYPE_UNKNOWN;
799                         }
800
801                         /* Old variants of vmxnet3 do not provide MSS */
802                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
803                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
804                                                 rcd, rxm);
805                 }
806         }
807
808         rxm->ol_flags = ol_flags;
809         rxm->packet_type = packet_type;
810 }
811
812 /*
813  * Process the Rx Completion Ring of given vmxnet3_rx_queue
814  * for nb_pkts burst and return the number of packets received
815  */
816 uint16_t
817 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
818 {
819         uint16_t nb_rx;
820         uint32_t nb_rxd, idx;
821         uint8_t ring_idx;
822         vmxnet3_rx_queue_t *rxq;
823         Vmxnet3_RxCompDesc *rcd;
824         vmxnet3_buf_info_t *rbi;
825         Vmxnet3_RxDesc *rxd;
826         struct rte_mbuf *rxm = NULL;
827         struct vmxnet3_hw *hw;
828
829         nb_rx = 0;
830         ring_idx = 0;
831         nb_rxd = 0;
832         idx = 0;
833
834         rxq = rx_queue;
835         hw = rxq->hw;
836
837         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
838
839         if (unlikely(rxq->stopped)) {
840                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
841                 return 0;
842         }
843
844         while (rcd->gen == rxq->comp_ring.gen) {
845                 struct rte_mbuf *newm;
846
847                 if (nb_rx >= nb_pkts)
848                         break;
849
850                 newm = rte_mbuf_raw_alloc(rxq->mp);
851                 if (unlikely(newm == NULL)) {
852                         PMD_RX_LOG(ERR, "Error allocating mbuf");
853                         rxq->stats.rx_buf_alloc_failure++;
854                         break;
855                 }
856
857                 idx = rcd->rxdIdx;
858                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
859                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
860                 RTE_SET_USED(rxd); /* used only for assert when enabled */
861                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
862
863                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
864
865                 RTE_ASSERT(rcd->len <= rxd->len);
866                 RTE_ASSERT(rbi->m);
867
868                 /* Get the packet buffer pointer from buf_info */
869                 rxm = rbi->m;
870
871                 /* Clear descriptor associated buf_info to be reused */
872                 rbi->m = NULL;
873                 rbi->bufPA = 0;
874
875                 /* Update the index that we received a packet */
876                 rxq->cmd_ring[ring_idx].next2comp = idx;
877
878                 /* For RCD with EOP set, check if there is frame error */
879                 if (unlikely(rcd->eop && rcd->err)) {
880                         rxq->stats.drop_total++;
881                         rxq->stats.drop_err++;
882
883                         if (!rcd->fcs) {
884                                 rxq->stats.drop_fcs++;
885                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
886                         }
887                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
888                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
889                                          rxq->comp_ring.base), rcd->rxdIdx);
890                         rte_pktmbuf_free_seg(rxm);
891                         if (rxq->start_seg) {
892                                 struct rte_mbuf *start = rxq->start_seg;
893
894                                 rxq->start_seg = NULL;
895                                 rte_pktmbuf_free(start);
896                         }
897                         goto rcd_done;
898                 }
899
900                 /* Initialize newly received packet buffer */
901                 rxm->port = rxq->port_id;
902                 rxm->nb_segs = 1;
903                 rxm->next = NULL;
904                 rxm->pkt_len = (uint16_t)rcd->len;
905                 rxm->data_len = (uint16_t)rcd->len;
906                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
907                 rxm->ol_flags = 0;
908                 rxm->vlan_tci = 0;
909                 rxm->packet_type = 0;
910
911                 /*
912                  * If this is the first buffer of the received packet,
913                  * set the pointer to the first mbuf of the packet
914                  * Otherwise, update the total length and the number of segments
915                  * of the current scattered packet, and update the pointer to
916                  * the last mbuf of the current packet.
917                  */
918                 if (rcd->sop) {
919                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
920
921                         if (unlikely(rcd->len == 0)) {
922                                 RTE_ASSERT(rcd->eop);
923
924                                 PMD_RX_LOG(DEBUG,
925                                            "Rx buf was skipped. rxring[%d][%d])",
926                                            ring_idx, idx);
927                                 rte_pktmbuf_free_seg(rxm);
928                                 goto rcd_done;
929                         }
930
931                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
932                                 uint8_t *rdd = rxq->data_ring.base +
933                                         idx * rxq->data_desc_size;
934
935                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
936                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
937                                            rdd, rcd->len);
938                         }
939
940                         rxq->start_seg = rxm;
941                         rxq->last_seg = rxm;
942                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
943                 } else {
944                         struct rte_mbuf *start = rxq->start_seg;
945
946                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
947
948                         if (rxm->data_len) {
949                                 start->pkt_len += rxm->data_len;
950                                 start->nb_segs++;
951
952                                 rxq->last_seg->next = rxm;
953                                 rxq->last_seg = rxm;
954                         } else {
955                                 rte_pktmbuf_free_seg(rxm);
956                         }
957                 }
958
959                 if (rcd->eop) {
960                         struct rte_mbuf *start = rxq->start_seg;
961
962                         vmxnet3_rx_offload(hw, rcd, start, 0);
963                         rx_pkts[nb_rx++] = start;
964                         rxq->start_seg = NULL;
965                 }
966
967 rcd_done:
968                 rxq->cmd_ring[ring_idx].next2comp = idx;
969                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
970                                           rxq->cmd_ring[ring_idx].size);
971
972                 /* It's time to renew descriptors */
973                 vmxnet3_renew_desc(rxq, ring_idx, newm);
974                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
975                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
976                                                rxq->cmd_ring[ring_idx].next2fill);
977                 }
978
979                 /* Advance to the next descriptor in comp_ring */
980                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
981
982                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
983                 nb_rxd++;
984                 if (nb_rxd > rxq->cmd_ring[0].size) {
985                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
986                                    " relinquish control.");
987                         break;
988                 }
989         }
990
991         if (unlikely(nb_rxd == 0)) {
992                 uint32_t avail;
993                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
994                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
995                         if (unlikely(avail > 0)) {
996                                 /* try to alloc new buf and renew descriptors */
997                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
998                         }
999                 }
1000                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1001                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1003                                                        rxq->cmd_ring[ring_idx].next2fill);
1004                         }
1005                 }
1006         }
1007
1008         return nb_rx;
1009 }
1010
1011 int
1012 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1013                            uint16_t queue_idx,
1014                            uint16_t nb_desc,
1015                            unsigned int socket_id,
1016                            const struct rte_eth_txconf *tx_conf)
1017 {
1018         struct vmxnet3_hw *hw = dev->data->dev_private;
1019         const struct rte_memzone *mz;
1020         struct vmxnet3_tx_queue *txq;
1021         struct vmxnet3_cmd_ring *ring;
1022         struct vmxnet3_comp_ring *comp_ring;
1023         struct vmxnet3_data_ring *data_ring;
1024         int size;
1025
1026         PMD_INIT_FUNC_TRACE();
1027
1028         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
1029             ETH_TXQ_FLAGS_NOXSUMSCTP) {
1030                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
1031                 return -EINVAL;
1032         }
1033
1034         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1035                           RTE_CACHE_LINE_SIZE);
1036         if (txq == NULL) {
1037                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1038                 return -ENOMEM;
1039         }
1040
1041         txq->queue_id = queue_idx;
1042         txq->port_id = dev->data->port_id;
1043         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1044         txq->hw = hw;
1045         txq->qid = queue_idx;
1046         txq->stopped = TRUE;
1047         txq->txdata_desc_size = hw->txdata_desc_size;
1048
1049         ring = &txq->cmd_ring;
1050         comp_ring = &txq->comp_ring;
1051         data_ring = &txq->data_ring;
1052
1053         /* Tx vmxnet ring length should be between 512-4096 */
1054         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1055                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1056                              VMXNET3_DEF_TX_RING_SIZE);
1057                 return -EINVAL;
1058         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1059                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1060                              VMXNET3_TX_RING_MAX_SIZE);
1061                 return -EINVAL;
1062         } else {
1063                 ring->size = nb_desc;
1064                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1065         }
1066         comp_ring->size = data_ring->size = ring->size;
1067
1068         /* Tx vmxnet rings structure initialization*/
1069         ring->next2fill = 0;
1070         ring->next2comp = 0;
1071         ring->gen = VMXNET3_INIT_GEN;
1072         comp_ring->next2proc = 0;
1073         comp_ring->gen = VMXNET3_INIT_GEN;
1074
1075         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1076         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1077         size += txq->txdata_desc_size * data_ring->size;
1078
1079         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1080                                       VMXNET3_RING_BA_ALIGN, socket_id);
1081         if (mz == NULL) {
1082                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1083                 return -ENOMEM;
1084         }
1085         txq->mz = mz;
1086         memset(mz->addr, 0, mz->len);
1087
1088         /* cmd_ring initialization */
1089         ring->base = mz->addr;
1090         ring->basePA = mz->iova;
1091
1092         /* comp_ring initialization */
1093         comp_ring->base = ring->base + ring->size;
1094         comp_ring->basePA = ring->basePA +
1095                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1096
1097         /* data_ring initialization */
1098         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1099         data_ring->basePA = comp_ring->basePA +
1100                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1101
1102         /* cmd_ring0 buf_info allocation */
1103         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1104                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1105         if (ring->buf_info == NULL) {
1106                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1107                 return -ENOMEM;
1108         }
1109
1110         /* Update the data portion with txq */
1111         dev->data->tx_queues[queue_idx] = txq;
1112
1113         return 0;
1114 }
1115
1116 int
1117 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1118                            uint16_t queue_idx,
1119                            uint16_t nb_desc,
1120                            unsigned int socket_id,
1121                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1122                            struct rte_mempool *mp)
1123 {
1124         const struct rte_memzone *mz;
1125         struct vmxnet3_rx_queue *rxq;
1126         struct vmxnet3_hw *hw = dev->data->dev_private;
1127         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1128         struct vmxnet3_comp_ring *comp_ring;
1129         struct vmxnet3_rx_data_ring *data_ring;
1130         int size;
1131         uint8_t i;
1132         char mem_name[32];
1133
1134         PMD_INIT_FUNC_TRACE();
1135
1136         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1137                           RTE_CACHE_LINE_SIZE);
1138         if (rxq == NULL) {
1139                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1140                 return -ENOMEM;
1141         }
1142
1143         rxq->mp = mp;
1144         rxq->queue_id = queue_idx;
1145         rxq->port_id = dev->data->port_id;
1146         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1147         rxq->hw = hw;
1148         rxq->qid1 = queue_idx;
1149         rxq->qid2 = queue_idx + hw->num_rx_queues;
1150         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1151         rxq->data_desc_size = hw->rxdata_desc_size;
1152         rxq->stopped = TRUE;
1153
1154         ring0 = &rxq->cmd_ring[0];
1155         ring1 = &rxq->cmd_ring[1];
1156         comp_ring = &rxq->comp_ring;
1157         data_ring = &rxq->data_ring;
1158
1159         /* Rx vmxnet rings length should be between 256-4096 */
1160         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1161                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1162                 return -EINVAL;
1163         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1164                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1165                 return -EINVAL;
1166         } else {
1167                 ring0->size = nb_desc;
1168                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1169                 ring1->size = ring0->size;
1170         }
1171
1172         comp_ring->size = ring0->size + ring1->size;
1173         data_ring->size = ring0->size;
1174
1175         /* Rx vmxnet rings structure initialization */
1176         ring0->next2fill = 0;
1177         ring1->next2fill = 0;
1178         ring0->next2comp = 0;
1179         ring1->next2comp = 0;
1180         ring0->gen = VMXNET3_INIT_GEN;
1181         ring1->gen = VMXNET3_INIT_GEN;
1182         comp_ring->next2proc = 0;
1183         comp_ring->gen = VMXNET3_INIT_GEN;
1184
1185         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1186         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1187         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1188                 size += rxq->data_desc_size * data_ring->size;
1189
1190         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1191                                       VMXNET3_RING_BA_ALIGN, socket_id);
1192         if (mz == NULL) {
1193                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1194                 return -ENOMEM;
1195         }
1196         rxq->mz = mz;
1197         memset(mz->addr, 0, mz->len);
1198
1199         /* cmd_ring0 initialization */
1200         ring0->base = mz->addr;
1201         ring0->basePA = mz->iova;
1202
1203         /* cmd_ring1 initialization */
1204         ring1->base = ring0->base + ring0->size;
1205         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1206
1207         /* comp_ring initialization */
1208         comp_ring->base = ring1->base + ring1->size;
1209         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1210                 ring1->size;
1211
1212         /* data_ring initialization */
1213         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1214                 data_ring->base =
1215                         (uint8_t *)(comp_ring->base + comp_ring->size);
1216                 data_ring->basePA = comp_ring->basePA +
1217                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1218         }
1219
1220         /* cmd_ring0-cmd_ring1 buf_info allocation */
1221         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1222
1223                 ring = &rxq->cmd_ring[i];
1224                 ring->rid = i;
1225                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1226
1227                 ring->buf_info = rte_zmalloc(mem_name,
1228                                              ring->size * sizeof(vmxnet3_buf_info_t),
1229                                              RTE_CACHE_LINE_SIZE);
1230                 if (ring->buf_info == NULL) {
1231                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1232                         return -ENOMEM;
1233                 }
1234         }
1235
1236         /* Update the data portion with rxq */
1237         dev->data->rx_queues[queue_idx] = rxq;
1238
1239         return 0;
1240 }
1241
1242 /*
1243  * Initializes Receive Unit
1244  * Load mbufs in rx queue in advance
1245  */
1246 int
1247 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1248 {
1249         struct vmxnet3_hw *hw = dev->data->dev_private;
1250
1251         int i, ret;
1252         uint8_t j;
1253
1254         PMD_INIT_FUNC_TRACE();
1255
1256         for (i = 0; i < hw->num_rx_queues; i++) {
1257                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1258
1259                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1260                         /* Passing 0 as alloc_num will allocate full ring */
1261                         ret = vmxnet3_post_rx_bufs(rxq, j);
1262                         if (ret <= 0) {
1263                                 PMD_INIT_LOG(ERR,
1264                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1265                                              i, j);
1266                                 return -ret;
1267                         }
1268                         /*
1269                          * Updating device with the index:next2fill to fill the
1270                          * mbufs for coming packets.
1271                          */
1272                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1273                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1274                                                        rxq->cmd_ring[j].next2fill);
1275                         }
1276                 }
1277                 rxq->stopped = FALSE;
1278                 rxq->start_seg = NULL;
1279         }
1280
1281         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1282                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1283
1284                 txq->stopped = FALSE;
1285         }
1286
1287         return 0;
1288 }
1289
1290 static uint8_t rss_intel_key[40] = {
1291         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1292         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1293         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1294         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1295         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1296 };
1297
1298 /*
1299  * Configure RSS feature
1300  */
1301 int
1302 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1303 {
1304         struct vmxnet3_hw *hw = dev->data->dev_private;
1305         struct VMXNET3_RSSConf *dev_rss_conf;
1306         struct rte_eth_rss_conf *port_rss_conf;
1307         uint64_t rss_hf;
1308         uint8_t i, j;
1309
1310         PMD_INIT_FUNC_TRACE();
1311
1312         dev_rss_conf = hw->rss_conf;
1313         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1314
1315         /* loading hashFunc */
1316         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1317         /* loading hashKeySize */
1318         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1319         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1320         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1321
1322         if (port_rss_conf->rss_key == NULL) {
1323                 /* Default hash key */
1324                 port_rss_conf->rss_key = rss_intel_key;
1325         }
1326
1327         /* loading hashKey */
1328         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1329                dev_rss_conf->hashKeySize);
1330
1331         /* loading indTable */
1332         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1333                 if (j == dev->data->nb_rx_queues)
1334                         j = 0;
1335                 dev_rss_conf->indTable[i] = j;
1336         }
1337
1338         /* loading hashType */
1339         dev_rss_conf->hashType = 0;
1340         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1341         if (rss_hf & ETH_RSS_IPV4)
1342                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1343         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1344                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1345         if (rss_hf & ETH_RSS_IPV6)
1346                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1347         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1348                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1349
1350         return VMXNET3_SUCCESS;
1351 }