net: add function to calculate IPv4 header length
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_IPV6 |     \
54                 PKT_TX_IPV4 |     \
55                 PKT_TX_L4_MASK |  \
56                 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
59         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74         uint32_t avail = 0;
75
76         if (rxq == NULL)
77                 return;
78
79         PMD_RX_LOG(DEBUG,
80                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82         PMD_RX_LOG(DEBUG,
83                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84                    (unsigned long)rxq->cmd_ring[0].basePA,
85                    (unsigned long)rxq->cmd_ring[1].basePA,
86                    (unsigned long)rxq->comp_ring.basePA);
87
88         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89         PMD_RX_LOG(DEBUG,
90                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91                    (uint32_t)rxq->cmd_ring[0].size, avail,
92                    rxq->comp_ring.next2proc,
93                    rxq->cmd_ring[0].size - avail);
94
95         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98                    rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105         uint32_t avail = 0;
106
107         if (txq == NULL)
108                 return;
109
110         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113                    (unsigned long)txq->cmd_ring.basePA,
114                    (unsigned long)txq->comp_ring.basePA,
115                    (unsigned long)txq->data_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119                    (uint32_t)txq->cmd_ring.size, avail,
120                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127         while (ring->next2comp != ring->next2fill) {
128                 /* No need to worry about desc ownership, device is quiesced by now. */
129                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131                 if (buf_info->m) {
132                         rte_pktmbuf_free(buf_info->m);
133                         buf_info->m = NULL;
134                         buf_info->bufPA = 0;
135                         buf_info->len = 0;
136                 }
137                 vmxnet3_cmd_ring_adv_next2comp(ring);
138         }
139 }
140
141 static void
142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144         uint32_t i;
145
146         for (i = 0; i < ring->size; i++) {
147                 /* No need to worry about desc ownership, device is quiesced by now. */
148                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150                 if (buf_info->m) {
151                         rte_pktmbuf_free_seg(buf_info->m);
152                         buf_info->m = NULL;
153                         buf_info->bufPA = 0;
154                         buf_info->len = 0;
155                 }
156                 vmxnet3_cmd_ring_adv_next2comp(ring);
157         }
158 }
159
160 static void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170         vmxnet3_tx_queue_t *tq = txq;
171
172         if (tq != NULL) {
173                 /* Release mbufs */
174                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177                 /* Release the memzone */
178                 rte_memzone_free(tq->mz);
179                 /* Release the queue */
180                 rte_free(tq);
181         }
182 }
183
184 void
185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187         int i;
188         vmxnet3_rx_queue_t *rq = rxq;
189
190         if (rq != NULL) {
191                 /* Release mbufs */
192                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195                 /* Release both the cmd_rings */
196                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199                 /* Release the memzone */
200                 rte_memzone_free(rq->mz);
201
202                 /* Release the queue */
203                 rte_free(rq);
204         }
205 }
206
207 static void
208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210         vmxnet3_tx_queue_t *tq = txq;
211         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214         int size;
215
216         if (tq != NULL) {
217                 /* Release the cmd_ring mbufs */
218                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219         }
220
221         /* Tx vmxnet rings structure initialization*/
222         ring->next2fill = 0;
223         ring->next2comp = 0;
224         ring->gen = VMXNET3_INIT_GEN;
225         comp_ring->next2proc = 0;
226         comp_ring->gen = VMXNET3_INIT_GEN;
227
228         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230         size += tq->txdata_desc_size * data_ring->size;
231
232         memset(ring->base, 0, size);
233 }
234
235 static void
236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238         int i;
239         vmxnet3_rx_queue_t *rq = rxq;
240         struct vmxnet3_hw *hw = rq->hw;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244         int size;
245
246         /* Release both the cmd_rings mbufs */
247         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250         ring0 = &rq->cmd_ring[0];
251         ring1 = &rq->cmd_ring[1];
252         comp_ring = &rq->comp_ring;
253
254         /* Rx vmxnet rings structure initialization */
255         ring0->next2fill = 0;
256         ring1->next2fill = 0;
257         ring0->next2comp = 0;
258         ring1->next2comp = 0;
259         ring0->gen = VMXNET3_INIT_GEN;
260         ring1->gen = VMXNET3_INIT_GEN;
261         comp_ring->next2proc = 0;
262         comp_ring->gen = VMXNET3_INIT_GEN;
263
264         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267                 size += rq->data_desc_size * data_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static int
299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303
304         /* Release cmd_ring descriptor and free mbuf */
305         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308         if (mbuf == NULL)
309                 rte_panic("EOP desc does not point to a valid mbuf");
310         rte_pktmbuf_free(mbuf);
311
312         txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314         while (txq->cmd_ring.next2comp != eop_idx) {
315                 /* no out-of-order completion */
316                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318                 completed++;
319         }
320
321         /* Mark the txd for which tcd was generated as completed */
322         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324         return completed + 1;
325 }
326
327 static void
328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330         int completed = 0;
331         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333                 (comp_ring->base + comp_ring->next2proc);
334
335         while (tcd->gen == comp_ring->gen) {
336                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340                                                     comp_ring->next2proc);
341         }
342
343         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348         uint16_t nb_pkts)
349 {
350         int32_t ret;
351         uint32_t i;
352         uint64_t ol_flags;
353         struct rte_mbuf *m;
354
355         for (i = 0; i != nb_pkts; i++) {
356                 m = tx_pkts[i];
357                 ol_flags = m->ol_flags;
358
359                 /* Non-TSO packet cannot occupy more than
360                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361                  */
362                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364                         rte_errno = EINVAL;
365                         return i;
366                 }
367
368                 /* check that only supported TX offloads are requested. */
369                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370                                 (ol_flags & PKT_TX_L4_MASK) ==
371                                 PKT_TX_SCTP_CKSUM) {
372                         rte_errno = ENOTSUP;
373                         return i;
374                 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377                 ret = rte_validate_tx_offload(m);
378                 if (ret != 0) {
379                         rte_errno = -ret;
380                         return i;
381                 }
382 #endif
383                 ret = rte_net_intel_cksum_prepare(m);
384                 if (ret != 0) {
385                         rte_errno = -ret;
386                         return i;
387                 }
388         }
389
390         return i;
391 }
392
393 uint16_t
394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395                   uint16_t nb_pkts)
396 {
397         uint16_t nb_tx;
398         vmxnet3_tx_queue_t *txq = tx_queue;
399         struct vmxnet3_hw *hw = txq->hw;
400         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403         if (unlikely(txq->stopped)) {
404                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405                 return 0;
406         }
407
408         /* Free up the comp_descriptors aggressively */
409         vmxnet3_tq_tx_complete(txq);
410
411         nb_tx = 0;
412         while (nb_tx < nb_pkts) {
413                 Vmxnet3_GenericDesc *gdesc;
414                 vmxnet3_buf_info_t *tbi;
415                 uint32_t first2fill, avail, dw2;
416                 struct rte_mbuf *txm = tx_pkts[nb_tx];
417                 struct rte_mbuf *m_seg = txm;
418                 int copy_size = 0;
419                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420                 /* # of descriptors needed for a packet. */
421                 unsigned count = txm->nb_segs;
422
423                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424                 if (count > avail) {
425                         /* Is command ring full? */
426                         if (unlikely(avail == 0)) {
427                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428                                 txq->stats.tx_ring_full++;
429                                 txq->stats.drop_total += (nb_pkts - nb_tx);
430                                 break;
431                         }
432
433                         /* Command ring is not full but cannot handle the
434                          * multi-segmented packet. Let's try the next packet
435                          * in this case.
436                          */
437                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438                                    "(avail %d needed %d)", avail, count);
439                         txq->stats.drop_total++;
440                         if (tso)
441                                 txq->stats.drop_tso++;
442                         rte_pktmbuf_free(txm);
443                         nb_tx++;
444                         continue;
445                 }
446
447                 /* Drop non-TSO packet that is excessively fragmented */
448                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451                         txq->stats.drop_too_many_segs++;
452                         txq->stats.drop_total++;
453                         rte_pktmbuf_free(txm);
454                         nb_tx++;
455                         continue;
456                 }
457
458                 if (txm->nb_segs == 1 &&
459                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460                         struct Vmxnet3_TxDataDesc *tdd;
461
462                         /* Skip empty packets */
463                         if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464                                 txq->stats.drop_total++;
465                                 rte_pktmbuf_free(txm);
466                                 nb_tx++;
467                                 continue;
468                         }
469
470                         tdd = (struct Vmxnet3_TxDataDesc *)
471                                 ((uint8 *)txq->data_ring.base +
472                                  txq->cmd_ring.next2fill *
473                                  txq->txdata_desc_size);
474                         copy_size = rte_pktmbuf_pkt_len(txm);
475                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476                 }
477
478                 /* use the previous gen bit for the SOP desc */
479                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480                 first2fill = txq->cmd_ring.next2fill;
481                 do {
482                         /* Remember the transmit buffer for cleanup */
483                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485                         /* NB: the following assumes that VMXNET3 maximum
486                          * transmit buffer size (16K) is greater than
487                          * maximum size of mbuf segment size.
488                          */
489                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491                         /* Skip empty segments */
492                         if (unlikely(m_seg->data_len == 0))
493                                 continue;
494
495                         if (copy_size) {
496                                 uint64 offset =
497                                         (uint64)txq->cmd_ring.next2fill *
498                                                         txq->txdata_desc_size;
499                                 gdesc->txd.addr =
500                                         rte_cpu_to_le_64(txq->data_ring.basePA +
501                                                          offset);
502                         } else {
503                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504                         }
505
506                         gdesc->dword[2] = dw2 | m_seg->data_len;
507                         gdesc->dword[3] = 0;
508
509                         /* move to the next2fill descriptor */
510                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512                         /* use the right gen for non-SOP desc */
513                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514                 } while ((m_seg = m_seg->next) != NULL);
515
516                 /* set the last buf_info for the pkt */
517                 tbi->m = txm;
518                 /* Update the EOP descriptor */
519                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521                 /* Add VLAN tag if present */
522                 gdesc = txq->cmd_ring.base + first2fill;
523                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524                         gdesc->txd.ti = 1;
525                         gdesc->txd.tci = txm->vlan_tci;
526                 }
527
528                 if (tso) {
529                         uint16_t mss = txm->tso_segsz;
530
531                         RTE_ASSERT(mss > 0);
532
533                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534                         gdesc->txd.om = VMXNET3_OM_TSO;
535                         gdesc->txd.msscof = mss;
536
537                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539                         gdesc->txd.om = VMXNET3_OM_CSUM;
540                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
543                         case PKT_TX_TCP_CKSUM:
544                                 gdesc->txd.msscof = gdesc->txd.hlen +
545                                         offsetof(struct rte_tcp_hdr, cksum);
546                                 break;
547                         case PKT_TX_UDP_CKSUM:
548                                 gdesc->txd.msscof = gdesc->txd.hlen +
549                                         offsetof(struct rte_udp_hdr,
550                                                 dgram_cksum);
551                                 break;
552                         default:
553                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554                                            txm->ol_flags & PKT_TX_L4_MASK);
555                                 abort();
556                         }
557                         deferred++;
558                 } else {
559                         gdesc->txd.hlen = 0;
560                         gdesc->txd.om = VMXNET3_OM_NONE;
561                         gdesc->txd.msscof = 0;
562                         deferred++;
563                 }
564
565                 /* flip the GEN bit on the SOP */
566                 rte_compiler_barrier();
567                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568
569                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570                 nb_tx++;
571         }
572
573         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574
575         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576                 txq_ctrl->txNumDeferred = 0;
577                 /* Notify vSwitch that packets are available. */
578                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579                                        txq->cmd_ring.next2fill);
580         }
581
582         return nb_tx;
583 }
584
585 static inline void
586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587                    struct rte_mbuf *mbuf)
588 {
589         uint32_t val;
590         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591         struct Vmxnet3_RxDesc *rxd =
592                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594
595         if (ring_id == 0) {
596                 /* Usually: One HEAD type buf per packet
597                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599                  */
600
601                 /* We use single packet buffer so all heads here */
602                 val = VMXNET3_RXD_BTYPE_HEAD;
603         } else {
604                 /* All BODY type buffers for 2nd ring */
605                 val = VMXNET3_RXD_BTYPE_BODY;
606         }
607
608         /*
609          * Load mbuf pointer into buf_info[ring_size]
610          * buf_info structure is equivalent to cookie for virtio-virtqueue
611          */
612         buf_info->m = mbuf;
613         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615
616         /* Load Rx Descriptor with the buffer's GPA */
617         rxd->addr = buf_info->bufPA;
618
619         /* After this point rxd->addr MUST not be NULL */
620         rxd->btype = val;
621         rxd->len = buf_info->len;
622         /* Flip gen bit at the end to change ownership */
623         rxd->gen = ring->gen;
624
625         vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
629  *  so that device can receive packets in those buffers.
630  *  Ring layout:
631  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
632  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
633  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
635  *      only for LRO.
636  */
637 static int
638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640         int err = 0;
641         uint32_t i = 0;
642         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643
644         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645                 struct rte_mbuf *mbuf;
646
647                 /* Allocate blank mbuf for the current Rx Descriptor */
648                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
649                 if (unlikely(mbuf == NULL)) {
650                         PMD_RX_LOG(ERR, "Error allocating mbuf");
651                         rxq->stats.rx_buf_alloc_failure++;
652                         err = ENOMEM;
653                         break;
654                 }
655
656                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
657                 i++;
658         }
659
660         /* Return error only if no buffers are posted at present */
661         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662                 return -err;
663         else
664                 return i;
665 }
666
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670                 struct rte_mbuf *rxm)
671 {
672         uint32_t hlen, slen;
673         struct rte_ipv4_hdr *ipv4_hdr;
674         struct rte_ipv6_hdr *ipv6_hdr;
675         struct rte_tcp_hdr *tcp_hdr;
676         char *ptr;
677
678         RTE_ASSERT(rcd->tcp);
679
680         ptr = rte_pktmbuf_mtod(rxm, char *);
681         slen = rte_pktmbuf_data_len(rxm);
682         hlen = sizeof(struct rte_ether_hdr);
683
684         if (rcd->v4) {
685                 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
686                         return hw->mtu - sizeof(struct rte_ipv4_hdr)
687                                         - sizeof(struct rte_tcp_hdr);
688
689                 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
690                 hlen += rte_ipv4_hdr_len(ipv4_hdr);
691         } else if (rcd->v6) {
692                 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
693                         return hw->mtu - sizeof(struct rte_ipv6_hdr) -
694                                         sizeof(struct rte_tcp_hdr);
695
696                 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
697                 hlen += sizeof(struct rte_ipv6_hdr);
698                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
699                         int frag;
700
701                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
702                                         &hlen, &frag);
703                 }
704         }
705
706         if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
707                 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
708                                 sizeof(struct rte_ether_hdr);
709
710         tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
711         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
712
713         if (rxm->udata64 > 1)
714                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
715                                 rxm->udata64 - 1) / rxm->udata64;
716         else
717                 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
718 }
719
720 /* Receive side checksum and other offloads */
721 static inline void
722 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
723                 struct rte_mbuf *rxm, const uint8_t sop)
724 {
725         uint64_t ol_flags = rxm->ol_flags;
726         uint32_t packet_type = rxm->packet_type;
727
728         /* Offloads set in sop */
729         if (sop) {
730                 /* Set packet type */
731                 packet_type |= RTE_PTYPE_L2_ETHER;
732
733                 /* Check large packet receive */
734                 if (VMXNET3_VERSION_GE_2(hw) &&
735                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
736                         const Vmxnet3_RxCompDescExt *rcde =
737                                         (const Vmxnet3_RxCompDescExt *)rcd;
738
739                         rxm->tso_segsz = rcde->mss;
740                         rxm->udata64 = rcde->segCnt;
741                         ol_flags |= PKT_RX_LRO;
742                 }
743         } else { /* Offloads set in eop */
744                 /* Check for RSS */
745                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
746                         ol_flags |= PKT_RX_RSS_HASH;
747                         rxm->hash.rss = rcd->rssHash;
748                 }
749
750                 /* Check for hardware stripped VLAN tag */
751                 if (rcd->ts) {
752                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
753                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
754                 }
755
756                 /* Check packet type, checksum errors, etc. */
757                 if (rcd->cnc) {
758                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
759                 } else {
760                         if (rcd->v4) {
761                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
762
763                                 if (rcd->ipc)
764                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
765                                 else
766                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
767
768                                 if (rcd->tuc) {
769                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
770                                         if (rcd->tcp)
771                                                 packet_type |= RTE_PTYPE_L4_TCP;
772                                         else
773                                                 packet_type |= RTE_PTYPE_L4_UDP;
774                                 } else {
775                                         if (rcd->tcp) {
776                                                 packet_type |= RTE_PTYPE_L4_TCP;
777                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
778                                         } else if (rcd->udp) {
779                                                 packet_type |= RTE_PTYPE_L4_UDP;
780                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
781                                         }
782                                 }
783                         } else if (rcd->v6) {
784                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
785
786                                 if (rcd->tuc) {
787                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
788                                         if (rcd->tcp)
789                                                 packet_type |= RTE_PTYPE_L4_TCP;
790                                         else
791                                                 packet_type |= RTE_PTYPE_L4_UDP;
792                                 } else {
793                                         if (rcd->tcp) {
794                                                 packet_type |= RTE_PTYPE_L4_TCP;
795                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
796                                         } else if (rcd->udp) {
797                                                 packet_type |= RTE_PTYPE_L4_UDP;
798                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
799                                         }
800                                 }
801                         } else {
802                                 packet_type |= RTE_PTYPE_UNKNOWN;
803                         }
804
805                         /* Old variants of vmxnet3 do not provide MSS */
806                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
807                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
808                                                 rcd, rxm);
809                 }
810         }
811
812         rxm->ol_flags = ol_flags;
813         rxm->packet_type = packet_type;
814 }
815
816 /*
817  * Process the Rx Completion Ring of given vmxnet3_rx_queue
818  * for nb_pkts burst and return the number of packets received
819  */
820 uint16_t
821 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
822 {
823         uint16_t nb_rx;
824         uint32_t nb_rxd, idx;
825         uint8_t ring_idx;
826         vmxnet3_rx_queue_t *rxq;
827         Vmxnet3_RxCompDesc *rcd;
828         vmxnet3_buf_info_t *rbi;
829         Vmxnet3_RxDesc *rxd;
830         struct rte_mbuf *rxm = NULL;
831         struct vmxnet3_hw *hw;
832
833         nb_rx = 0;
834         ring_idx = 0;
835         nb_rxd = 0;
836         idx = 0;
837
838         rxq = rx_queue;
839         hw = rxq->hw;
840
841         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
842
843         if (unlikely(rxq->stopped)) {
844                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
845                 return 0;
846         }
847
848         while (rcd->gen == rxq->comp_ring.gen) {
849                 struct rte_mbuf *newm;
850
851                 if (nb_rx >= nb_pkts)
852                         break;
853
854                 newm = rte_mbuf_raw_alloc(rxq->mp);
855                 if (unlikely(newm == NULL)) {
856                         PMD_RX_LOG(ERR, "Error allocating mbuf");
857                         rxq->stats.rx_buf_alloc_failure++;
858                         break;
859                 }
860
861                 idx = rcd->rxdIdx;
862                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
863                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
864                 RTE_SET_USED(rxd); /* used only for assert when enabled */
865                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
866
867                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
868
869                 RTE_ASSERT(rcd->len <= rxd->len);
870                 RTE_ASSERT(rbi->m);
871
872                 /* Get the packet buffer pointer from buf_info */
873                 rxm = rbi->m;
874
875                 /* Clear descriptor associated buf_info to be reused */
876                 rbi->m = NULL;
877                 rbi->bufPA = 0;
878
879                 /* Update the index that we received a packet */
880                 rxq->cmd_ring[ring_idx].next2comp = idx;
881
882                 /* For RCD with EOP set, check if there is frame error */
883                 if (unlikely(rcd->eop && rcd->err)) {
884                         rxq->stats.drop_total++;
885                         rxq->stats.drop_err++;
886
887                         if (!rcd->fcs) {
888                                 rxq->stats.drop_fcs++;
889                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
890                         }
891                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
892                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
893                                          rxq->comp_ring.base), rcd->rxdIdx);
894                         rte_pktmbuf_free_seg(rxm);
895                         if (rxq->start_seg) {
896                                 struct rte_mbuf *start = rxq->start_seg;
897
898                                 rxq->start_seg = NULL;
899                                 rte_pktmbuf_free(start);
900                         }
901                         goto rcd_done;
902                 }
903
904                 /* Initialize newly received packet buffer */
905                 rxm->port = rxq->port_id;
906                 rxm->nb_segs = 1;
907                 rxm->next = NULL;
908                 rxm->pkt_len = (uint16_t)rcd->len;
909                 rxm->data_len = (uint16_t)rcd->len;
910                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
911                 rxm->ol_flags = 0;
912                 rxm->vlan_tci = 0;
913                 rxm->packet_type = 0;
914
915                 /*
916                  * If this is the first buffer of the received packet,
917                  * set the pointer to the first mbuf of the packet
918                  * Otherwise, update the total length and the number of segments
919                  * of the current scattered packet, and update the pointer to
920                  * the last mbuf of the current packet.
921                  */
922                 if (rcd->sop) {
923                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
924
925                         if (unlikely(rcd->len == 0)) {
926                                 RTE_ASSERT(rcd->eop);
927
928                                 PMD_RX_LOG(DEBUG,
929                                            "Rx buf was skipped. rxring[%d][%d])",
930                                            ring_idx, idx);
931                                 rte_pktmbuf_free_seg(rxm);
932                                 goto rcd_done;
933                         }
934
935                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
936                                 uint8_t *rdd = rxq->data_ring.base +
937                                         idx * rxq->data_desc_size;
938
939                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
940                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
941                                            rdd, rcd->len);
942                         }
943
944                         rxq->start_seg = rxm;
945                         rxq->last_seg = rxm;
946                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
947                 } else {
948                         struct rte_mbuf *start = rxq->start_seg;
949
950                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
951
952                         if (likely(start && rxm->data_len > 0)) {
953                                 start->pkt_len += rxm->data_len;
954                                 start->nb_segs++;
955
956                                 rxq->last_seg->next = rxm;
957                                 rxq->last_seg = rxm;
958                         } else {
959                                 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
960                                 rxq->stats.drop_total++;
961                                 rxq->stats.drop_err++;
962
963                                 rte_pktmbuf_free_seg(rxm);
964                         }
965                 }
966
967                 if (rcd->eop) {
968                         struct rte_mbuf *start = rxq->start_seg;
969
970                         vmxnet3_rx_offload(hw, rcd, start, 0);
971                         rx_pkts[nb_rx++] = start;
972                         rxq->start_seg = NULL;
973                 }
974
975 rcd_done:
976                 rxq->cmd_ring[ring_idx].next2comp = idx;
977                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
978                                           rxq->cmd_ring[ring_idx].size);
979
980                 /* It's time to renew descriptors */
981                 vmxnet3_renew_desc(rxq, ring_idx, newm);
982                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
983                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
984                                                rxq->cmd_ring[ring_idx].next2fill);
985                 }
986
987                 /* Advance to the next descriptor in comp_ring */
988                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
989
990                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
991                 nb_rxd++;
992                 if (nb_rxd > rxq->cmd_ring[0].size) {
993                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
994                                    " relinquish control.");
995                         break;
996                 }
997         }
998
999         if (unlikely(nb_rxd == 0)) {
1000                 uint32_t avail;
1001                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1002                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1003                         if (unlikely(avail > 0)) {
1004                                 /* try to alloc new buf and renew descriptors */
1005                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
1006                         }
1007                 }
1008                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1009                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1010                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1011                                                        rxq->cmd_ring[ring_idx].next2fill);
1012                         }
1013                 }
1014         }
1015
1016         return nb_rx;
1017 }
1018
1019 int
1020 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1021                            uint16_t queue_idx,
1022                            uint16_t nb_desc,
1023                            unsigned int socket_id,
1024                            const struct rte_eth_txconf *tx_conf __rte_unused)
1025 {
1026         struct vmxnet3_hw *hw = dev->data->dev_private;
1027         const struct rte_memzone *mz;
1028         struct vmxnet3_tx_queue *txq;
1029         struct vmxnet3_cmd_ring *ring;
1030         struct vmxnet3_comp_ring *comp_ring;
1031         struct vmxnet3_data_ring *data_ring;
1032         int size;
1033
1034         PMD_INIT_FUNC_TRACE();
1035
1036         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1037                           RTE_CACHE_LINE_SIZE);
1038         if (txq == NULL) {
1039                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1040                 return -ENOMEM;
1041         }
1042
1043         txq->queue_id = queue_idx;
1044         txq->port_id = dev->data->port_id;
1045         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1046         txq->hw = hw;
1047         txq->qid = queue_idx;
1048         txq->stopped = TRUE;
1049         txq->txdata_desc_size = hw->txdata_desc_size;
1050
1051         ring = &txq->cmd_ring;
1052         comp_ring = &txq->comp_ring;
1053         data_ring = &txq->data_ring;
1054
1055         /* Tx vmxnet ring length should be between 512-4096 */
1056         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1057                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1058                              VMXNET3_DEF_TX_RING_SIZE);
1059                 return -EINVAL;
1060         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1061                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1062                              VMXNET3_TX_RING_MAX_SIZE);
1063                 return -EINVAL;
1064         } else {
1065                 ring->size = nb_desc;
1066                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1067         }
1068         comp_ring->size = data_ring->size = ring->size;
1069
1070         /* Tx vmxnet rings structure initialization*/
1071         ring->next2fill = 0;
1072         ring->next2comp = 0;
1073         ring->gen = VMXNET3_INIT_GEN;
1074         comp_ring->next2proc = 0;
1075         comp_ring->gen = VMXNET3_INIT_GEN;
1076
1077         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1078         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1079         size += txq->txdata_desc_size * data_ring->size;
1080
1081         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1082                                       VMXNET3_RING_BA_ALIGN, socket_id);
1083         if (mz == NULL) {
1084                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1085                 return -ENOMEM;
1086         }
1087         txq->mz = mz;
1088         memset(mz->addr, 0, mz->len);
1089
1090         /* cmd_ring initialization */
1091         ring->base = mz->addr;
1092         ring->basePA = mz->iova;
1093
1094         /* comp_ring initialization */
1095         comp_ring->base = ring->base + ring->size;
1096         comp_ring->basePA = ring->basePA +
1097                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1098
1099         /* data_ring initialization */
1100         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1101         data_ring->basePA = comp_ring->basePA +
1102                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1103
1104         /* cmd_ring0 buf_info allocation */
1105         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1106                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1107         if (ring->buf_info == NULL) {
1108                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1109                 return -ENOMEM;
1110         }
1111
1112         /* Update the data portion with txq */
1113         dev->data->tx_queues[queue_idx] = txq;
1114
1115         return 0;
1116 }
1117
1118 int
1119 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1120                            uint16_t queue_idx,
1121                            uint16_t nb_desc,
1122                            unsigned int socket_id,
1123                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1124                            struct rte_mempool *mp)
1125 {
1126         const struct rte_memzone *mz;
1127         struct vmxnet3_rx_queue *rxq;
1128         struct vmxnet3_hw *hw = dev->data->dev_private;
1129         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1130         struct vmxnet3_comp_ring *comp_ring;
1131         struct vmxnet3_rx_data_ring *data_ring;
1132         int size;
1133         uint8_t i;
1134         char mem_name[32];
1135
1136         PMD_INIT_FUNC_TRACE();
1137
1138         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1139                           RTE_CACHE_LINE_SIZE);
1140         if (rxq == NULL) {
1141                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1142                 return -ENOMEM;
1143         }
1144
1145         rxq->mp = mp;
1146         rxq->queue_id = queue_idx;
1147         rxq->port_id = dev->data->port_id;
1148         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1149         rxq->hw = hw;
1150         rxq->qid1 = queue_idx;
1151         rxq->qid2 = queue_idx + hw->num_rx_queues;
1152         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1153         rxq->data_desc_size = hw->rxdata_desc_size;
1154         rxq->stopped = TRUE;
1155
1156         ring0 = &rxq->cmd_ring[0];
1157         ring1 = &rxq->cmd_ring[1];
1158         comp_ring = &rxq->comp_ring;
1159         data_ring = &rxq->data_ring;
1160
1161         /* Rx vmxnet rings length should be between 256-4096 */
1162         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1163                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1164                 return -EINVAL;
1165         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1166                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1167                 return -EINVAL;
1168         } else {
1169                 ring0->size = nb_desc;
1170                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1171                 ring1->size = ring0->size;
1172         }
1173
1174         comp_ring->size = ring0->size + ring1->size;
1175         data_ring->size = ring0->size;
1176
1177         /* Rx vmxnet rings structure initialization */
1178         ring0->next2fill = 0;
1179         ring1->next2fill = 0;
1180         ring0->next2comp = 0;
1181         ring1->next2comp = 0;
1182         ring0->gen = VMXNET3_INIT_GEN;
1183         ring1->gen = VMXNET3_INIT_GEN;
1184         comp_ring->next2proc = 0;
1185         comp_ring->gen = VMXNET3_INIT_GEN;
1186
1187         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1188         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1189         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1190                 size += rxq->data_desc_size * data_ring->size;
1191
1192         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1193                                       VMXNET3_RING_BA_ALIGN, socket_id);
1194         if (mz == NULL) {
1195                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1196                 return -ENOMEM;
1197         }
1198         rxq->mz = mz;
1199         memset(mz->addr, 0, mz->len);
1200
1201         /* cmd_ring0 initialization */
1202         ring0->base = mz->addr;
1203         ring0->basePA = mz->iova;
1204
1205         /* cmd_ring1 initialization */
1206         ring1->base = ring0->base + ring0->size;
1207         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1208
1209         /* comp_ring initialization */
1210         comp_ring->base = ring1->base + ring1->size;
1211         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1212                 ring1->size;
1213
1214         /* data_ring initialization */
1215         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1216                 data_ring->base =
1217                         (uint8_t *)(comp_ring->base + comp_ring->size);
1218                 data_ring->basePA = comp_ring->basePA +
1219                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1220         }
1221
1222         /* cmd_ring0-cmd_ring1 buf_info allocation */
1223         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1224
1225                 ring = &rxq->cmd_ring[i];
1226                 ring->rid = i;
1227                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1228
1229                 ring->buf_info = rte_zmalloc(mem_name,
1230                                              ring->size * sizeof(vmxnet3_buf_info_t),
1231                                              RTE_CACHE_LINE_SIZE);
1232                 if (ring->buf_info == NULL) {
1233                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1234                         return -ENOMEM;
1235                 }
1236         }
1237
1238         /* Update the data portion with rxq */
1239         dev->data->rx_queues[queue_idx] = rxq;
1240
1241         return 0;
1242 }
1243
1244 /*
1245  * Initializes Receive Unit
1246  * Load mbufs in rx queue in advance
1247  */
1248 int
1249 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1250 {
1251         struct vmxnet3_hw *hw = dev->data->dev_private;
1252
1253         int i, ret;
1254         uint8_t j;
1255
1256         PMD_INIT_FUNC_TRACE();
1257
1258         for (i = 0; i < hw->num_rx_queues; i++) {
1259                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1260
1261                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1262                         /* Passing 0 as alloc_num will allocate full ring */
1263                         ret = vmxnet3_post_rx_bufs(rxq, j);
1264                         if (ret <= 0) {
1265                                 PMD_INIT_LOG(ERR,
1266                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1267                                              i, j);
1268                                 return -ret;
1269                         }
1270                         /*
1271                          * Updating device with the index:next2fill to fill the
1272                          * mbufs for coming packets.
1273                          */
1274                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1275                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1276                                                        rxq->cmd_ring[j].next2fill);
1277                         }
1278                 }
1279                 rxq->stopped = FALSE;
1280                 rxq->start_seg = NULL;
1281         }
1282
1283         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1284                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1285
1286                 txq->stopped = FALSE;
1287         }
1288
1289         return 0;
1290 }
1291
1292 static uint8_t rss_intel_key[40] = {
1293         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1294         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1295         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1296         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1297         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1298 };
1299
1300 /*
1301  * Additional RSS configurations based on vmxnet v4+ APIs
1302  */
1303 int
1304 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1305 {
1306         struct vmxnet3_hw *hw = dev->data->dev_private;
1307         Vmxnet3_DriverShared *shared = hw->shared;
1308         Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1309         struct rte_eth_rss_conf *port_rss_conf;
1310         uint64_t rss_hf;
1311         uint32_t ret;
1312
1313         PMD_INIT_FUNC_TRACE();
1314
1315         cmdInfo->setRSSFields = 0;
1316         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1317
1318         if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1319             VMXNET3_MANDATORY_V4_RSS) {
1320                 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1321                              "automatically setting it");
1322                 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1323         }
1324
1325         rss_hf = port_rss_conf->rss_hf &
1326                 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1327
1328         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1329                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1330         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1331                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1332         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1333                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1334         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1335                 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1336
1337         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1338                                VMXNET3_CMD_SET_RSS_FIELDS);
1339         ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1340
1341         if (ret != VMXNET3_SUCCESS) {
1342                 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1343         }
1344
1345         return ret;
1346 }
1347
1348 /*
1349  * Configure RSS feature
1350  */
1351 int
1352 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1353 {
1354         struct vmxnet3_hw *hw = dev->data->dev_private;
1355         struct VMXNET3_RSSConf *dev_rss_conf;
1356         struct rte_eth_rss_conf *port_rss_conf;
1357         uint64_t rss_hf;
1358         uint8_t i, j;
1359
1360         PMD_INIT_FUNC_TRACE();
1361
1362         dev_rss_conf = hw->rss_conf;
1363         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1364
1365         /* loading hashFunc */
1366         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1367         /* loading hashKeySize */
1368         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1369         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1370         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1371
1372         if (port_rss_conf->rss_key == NULL) {
1373                 /* Default hash key */
1374                 port_rss_conf->rss_key = rss_intel_key;
1375         }
1376
1377         /* loading hashKey */
1378         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1379                dev_rss_conf->hashKeySize);
1380
1381         /* loading indTable */
1382         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1383                 if (j == dev->data->nb_rx_queues)
1384                         j = 0;
1385                 dev_rss_conf->indTable[i] = j;
1386         }
1387
1388         /* loading hashType */
1389         dev_rss_conf->hashType = 0;
1390         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1391         if (rss_hf & ETH_RSS_IPV4)
1392                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1393         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1394                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1395         if (rss_hf & ETH_RSS_IPV6)
1396                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1397         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1398                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1399
1400         return VMXNET3_SUCCESS;
1401 }