net/vmxnet3: ignore empty segments in reception
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2015 Intel Corporation
3  */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52                 PKT_TX_VLAN_PKT | \
53                 PKT_TX_L4_MASK |  \
54                 PKT_TX_TCP_SEG)
55
56 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
57         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
58
59 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
60
61 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
62 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
63 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
64 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
65 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
66 #endif
67
68 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
69 static void
70 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
71 {
72         uint32_t avail = 0;
73
74         if (rxq == NULL)
75                 return;
76
77         PMD_RX_LOG(DEBUG,
78                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
79                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
80         PMD_RX_LOG(DEBUG,
81                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
82                    (unsigned long)rxq->cmd_ring[0].basePA,
83                    (unsigned long)rxq->cmd_ring[1].basePA,
84                    (unsigned long)rxq->comp_ring.basePA);
85
86         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
87         PMD_RX_LOG(DEBUG,
88                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
89                    (uint32_t)rxq->cmd_ring[0].size, avail,
90                    rxq->comp_ring.next2proc,
91                    rxq->cmd_ring[0].size - avail);
92
93         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
94         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
95                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
96                    rxq->cmd_ring[1].size - avail);
97
98 }
99
100 static void
101 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
102 {
103         uint32_t avail = 0;
104
105         if (txq == NULL)
106                 return;
107
108         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
109                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
110         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
111                    (unsigned long)txq->cmd_ring.basePA,
112                    (unsigned long)txq->comp_ring.basePA,
113                    (unsigned long)txq->data_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
116         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
117                    (uint32_t)txq->cmd_ring.size, avail,
118                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
119 }
120 #endif
121
122 static void
123 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
124 {
125         while (ring->next2comp != ring->next2fill) {
126                 /* No need to worry about desc ownership, device is quiesced by now. */
127                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
128
129                 if (buf_info->m) {
130                         rte_pktmbuf_free(buf_info->m);
131                         buf_info->m = NULL;
132                         buf_info->bufPA = 0;
133                         buf_info->len = 0;
134                 }
135                 vmxnet3_cmd_ring_adv_next2comp(ring);
136         }
137 }
138
139 static void
140 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
141 {
142         uint32_t i;
143
144         for (i = 0; i < ring->size; i++) {
145                 /* No need to worry about desc ownership, device is quiesced by now. */
146                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
147
148                 if (buf_info->m) {
149                         rte_pktmbuf_free_seg(buf_info->m);
150                         buf_info->m = NULL;
151                         buf_info->bufPA = 0;
152                         buf_info->len = 0;
153                 }
154                 vmxnet3_cmd_ring_adv_next2comp(ring);
155         }
156 }
157
158 static void
159 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
160 {
161         rte_free(ring->buf_info);
162         ring->buf_info = NULL;
163 }
164
165 void
166 vmxnet3_dev_tx_queue_release(void *txq)
167 {
168         vmxnet3_tx_queue_t *tq = txq;
169
170         if (tq != NULL) {
171                 /* Release mbufs */
172                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
173                 /* Release the cmd_ring */
174                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175                 /* Release the memzone */
176                 rte_memzone_free(tq->mz);
177                 /* Release the queue */
178                 rte_free(tq);
179         }
180 }
181
182 void
183 vmxnet3_dev_rx_queue_release(void *rxq)
184 {
185         int i;
186         vmxnet3_rx_queue_t *rq = rxq;
187
188         if (rq != NULL) {
189                 /* Release mbufs */
190                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
191                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
192
193                 /* Release both the cmd_rings */
194                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
195                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
196
197                 /* Release the memzone */
198                 rte_memzone_free(rq->mz);
199
200                 /* Release the queue */
201                 rte_free(rq);
202         }
203 }
204
205 static void
206 vmxnet3_dev_tx_queue_reset(void *txq)
207 {
208         vmxnet3_tx_queue_t *tq = txq;
209         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
210         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
211         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
212         int size;
213
214         if (tq != NULL) {
215                 /* Release the cmd_ring mbufs */
216                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
217         }
218
219         /* Tx vmxnet rings structure initialization*/
220         ring->next2fill = 0;
221         ring->next2comp = 0;
222         ring->gen = VMXNET3_INIT_GEN;
223         comp_ring->next2proc = 0;
224         comp_ring->gen = VMXNET3_INIT_GEN;
225
226         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
227         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
228         size += tq->txdata_desc_size * data_ring->size;
229
230         memset(ring->base, 0, size);
231 }
232
233 static void
234 vmxnet3_dev_rx_queue_reset(void *rxq)
235 {
236         int i;
237         vmxnet3_rx_queue_t *rq = rxq;
238         struct vmxnet3_hw *hw = rq->hw;
239         struct vmxnet3_cmd_ring *ring0, *ring1;
240         struct vmxnet3_comp_ring *comp_ring;
241         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
242         int size;
243
244         /* Release both the cmd_rings mbufs */
245         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
246                 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
247
248         ring0 = &rq->cmd_ring[0];
249         ring1 = &rq->cmd_ring[1];
250         comp_ring = &rq->comp_ring;
251
252         /* Rx vmxnet rings structure initialization */
253         ring0->next2fill = 0;
254         ring1->next2fill = 0;
255         ring0->next2comp = 0;
256         ring1->next2comp = 0;
257         ring0->gen = VMXNET3_INIT_GEN;
258         ring1->gen = VMXNET3_INIT_GEN;
259         comp_ring->next2proc = 0;
260         comp_ring->gen = VMXNET3_INIT_GEN;
261
262         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
263         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
264         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
265                 size += rq->data_desc_size * data_ring->size;
266
267         memset(ring0->base, 0, size);
268 }
269
270 void
271 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
272 {
273         unsigned i;
274
275         PMD_INIT_FUNC_TRACE();
276
277         for (i = 0; i < dev->data->nb_tx_queues; i++) {
278                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
279
280                 if (txq != NULL) {
281                         txq->stopped = TRUE;
282                         vmxnet3_dev_tx_queue_reset(txq);
283                 }
284         }
285
286         for (i = 0; i < dev->data->nb_rx_queues; i++) {
287                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
288
289                 if (rxq != NULL) {
290                         rxq->stopped = TRUE;
291                         vmxnet3_dev_rx_queue_reset(rxq);
292                 }
293         }
294 }
295
296 static int
297 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
298 {
299         int completed = 0;
300         struct rte_mbuf *mbuf;
301
302         /* Release cmd_ring descriptor and free mbuf */
303         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
304
305         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
306         if (mbuf == NULL)
307                 rte_panic("EOP desc does not point to a valid mbuf");
308         rte_pktmbuf_free(mbuf);
309
310         txq->cmd_ring.buf_info[eop_idx].m = NULL;
311
312         while (txq->cmd_ring.next2comp != eop_idx) {
313                 /* no out-of-order completion */
314                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
315                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
316                 completed++;
317         }
318
319         /* Mark the txd for which tcd was generated as completed */
320         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
321
322         return completed + 1;
323 }
324
325 static void
326 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
327 {
328         int completed = 0;
329         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
330         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
331                 (comp_ring->base + comp_ring->next2proc);
332
333         while (tcd->gen == comp_ring->gen) {
334                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
335
336                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
337                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
338                                                     comp_ring->next2proc);
339         }
340
341         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
342 }
343
344 uint16_t
345 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
346         uint16_t nb_pkts)
347 {
348         int32_t ret;
349         uint32_t i;
350         uint64_t ol_flags;
351         struct rte_mbuf *m;
352
353         for (i = 0; i != nb_pkts; i++) {
354                 m = tx_pkts[i];
355                 ol_flags = m->ol_flags;
356
357                 /* Non-TSO packet cannot occupy more than
358                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
359                  */
360                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
361                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
362                         rte_errno = -EINVAL;
363                         return i;
364                 }
365
366                 /* check that only supported TX offloads are requested. */
367                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
368                                 (ol_flags & PKT_TX_L4_MASK) ==
369                                 PKT_TX_SCTP_CKSUM) {
370                         rte_errno = -ENOTSUP;
371                         return i;
372                 }
373
374 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
375                 ret = rte_validate_tx_offload(m);
376                 if (ret != 0) {
377                         rte_errno = ret;
378                         return i;
379                 }
380 #endif
381                 ret = rte_net_intel_cksum_prepare(m);
382                 if (ret != 0) {
383                         rte_errno = ret;
384                         return i;
385                 }
386         }
387
388         return i;
389 }
390
391 uint16_t
392 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
393                   uint16_t nb_pkts)
394 {
395         uint16_t nb_tx;
396         vmxnet3_tx_queue_t *txq = tx_queue;
397         struct vmxnet3_hw *hw = txq->hw;
398         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
399         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
400
401         if (unlikely(txq->stopped)) {
402                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
403                 return 0;
404         }
405
406         /* Free up the comp_descriptors aggressively */
407         vmxnet3_tq_tx_complete(txq);
408
409         nb_tx = 0;
410         while (nb_tx < nb_pkts) {
411                 Vmxnet3_GenericDesc *gdesc;
412                 vmxnet3_buf_info_t *tbi;
413                 uint32_t first2fill, avail, dw2;
414                 struct rte_mbuf *txm = tx_pkts[nb_tx];
415                 struct rte_mbuf *m_seg = txm;
416                 int copy_size = 0;
417                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
418                 /* # of descriptors needed for a packet. */
419                 unsigned count = txm->nb_segs;
420
421                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
422                 if (count > avail) {
423                         /* Is command ring full? */
424                         if (unlikely(avail == 0)) {
425                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
426                                 txq->stats.tx_ring_full++;
427                                 txq->stats.drop_total += (nb_pkts - nb_tx);
428                                 break;
429                         }
430
431                         /* Command ring is not full but cannot handle the
432                          * multi-segmented packet. Let's try the next packet
433                          * in this case.
434                          */
435                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
436                                    "(avail %d needed %d)", avail, count);
437                         txq->stats.drop_total++;
438                         if (tso)
439                                 txq->stats.drop_tso++;
440                         rte_pktmbuf_free(txm);
441                         nb_tx++;
442                         continue;
443                 }
444
445                 /* Drop non-TSO packet that is excessively fragmented */
446                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
447                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
448                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
449                         txq->stats.drop_too_many_segs++;
450                         txq->stats.drop_total++;
451                         rte_pktmbuf_free(txm);
452                         nb_tx++;
453                         continue;
454                 }
455
456                 if (txm->nb_segs == 1 &&
457                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
458                         struct Vmxnet3_TxDataDesc *tdd;
459
460                         tdd = (struct Vmxnet3_TxDataDesc *)
461                                 ((uint8 *)txq->data_ring.base +
462                                  txq->cmd_ring.next2fill *
463                                  txq->txdata_desc_size);
464                         copy_size = rte_pktmbuf_pkt_len(txm);
465                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
466                 }
467
468                 /* use the previous gen bit for the SOP desc */
469                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
470                 first2fill = txq->cmd_ring.next2fill;
471                 do {
472                         /* Remember the transmit buffer for cleanup */
473                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
474
475                         /* NB: the following assumes that VMXNET3 maximum
476                          * transmit buffer size (16K) is greater than
477                          * maximum size of mbuf segment size.
478                          */
479                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
480                         if (copy_size) {
481                                 uint64 offset =
482                                         (uint64)txq->cmd_ring.next2fill *
483                                                         txq->txdata_desc_size;
484                                 gdesc->txd.addr =
485                                         rte_cpu_to_le_64(txq->data_ring.basePA +
486                                                          offset);
487                         } else {
488                                 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
489                         }
490
491                         gdesc->dword[2] = dw2 | m_seg->data_len;
492                         gdesc->dword[3] = 0;
493
494                         /* move to the next2fill descriptor */
495                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
496
497                         /* use the right gen for non-SOP desc */
498                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
499                 } while ((m_seg = m_seg->next) != NULL);
500
501                 /* set the last buf_info for the pkt */
502                 tbi->m = txm;
503                 /* Update the EOP descriptor */
504                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
505
506                 /* Add VLAN tag if present */
507                 gdesc = txq->cmd_ring.base + first2fill;
508                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
509                         gdesc->txd.ti = 1;
510                         gdesc->txd.tci = txm->vlan_tci;
511                 }
512
513                 if (tso) {
514                         uint16_t mss = txm->tso_segsz;
515
516                         RTE_ASSERT(mss > 0);
517
518                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
519                         gdesc->txd.om = VMXNET3_OM_TSO;
520                         gdesc->txd.msscof = mss;
521
522                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
523                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
524                         gdesc->txd.om = VMXNET3_OM_CSUM;
525                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
526
527                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
528                         case PKT_TX_TCP_CKSUM:
529                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
530                                 break;
531                         case PKT_TX_UDP_CKSUM:
532                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
533                                 break;
534                         default:
535                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
536                                            txm->ol_flags & PKT_TX_L4_MASK);
537                                 abort();
538                         }
539                         deferred++;
540                 } else {
541                         gdesc->txd.hlen = 0;
542                         gdesc->txd.om = VMXNET3_OM_NONE;
543                         gdesc->txd.msscof = 0;
544                         deferred++;
545                 }
546
547                 /* flip the GEN bit on the SOP */
548                 rte_compiler_barrier();
549                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
550
551                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
552                 nb_tx++;
553         }
554
555         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
556
557         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
558                 txq_ctrl->txNumDeferred = 0;
559                 /* Notify vSwitch that packets are available. */
560                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
561                                        txq->cmd_ring.next2fill);
562         }
563
564         return nb_tx;
565 }
566
567 static inline void
568 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
569                    struct rte_mbuf *mbuf)
570 {
571         uint32_t val;
572         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
573         struct Vmxnet3_RxDesc *rxd =
574                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
575         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
576
577         if (ring_id == 0) {
578                 /* Usually: One HEAD type buf per packet
579                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
580                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
581                  */
582
583                 /* We use single packet buffer so all heads here */
584                 val = VMXNET3_RXD_BTYPE_HEAD;
585         } else {
586                 /* All BODY type buffers for 2nd ring */
587                 val = VMXNET3_RXD_BTYPE_BODY;
588         }
589
590         /*
591          * Load mbuf pointer into buf_info[ring_size]
592          * buf_info structure is equivalent to cookie for virtio-virtqueue
593          */
594         buf_info->m = mbuf;
595         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
596         buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
597
598         /* Load Rx Descriptor with the buffer's GPA */
599         rxd->addr = buf_info->bufPA;
600
601         /* After this point rxd->addr MUST not be NULL */
602         rxd->btype = val;
603         rxd->len = buf_info->len;
604         /* Flip gen bit at the end to change ownership */
605         rxd->gen = ring->gen;
606
607         vmxnet3_cmd_ring_adv_next2fill(ring);
608 }
609 /*
610  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
611  *  so that device can receive packets in those buffers.
612  *  Ring layout:
613  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
614  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
615  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
616  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
617  *      only for LRO.
618  */
619 static int
620 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
621 {
622         int err = 0;
623         uint32_t i = 0;
624         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
625
626         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
627                 struct rte_mbuf *mbuf;
628
629                 /* Allocate blank mbuf for the current Rx Descriptor */
630                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
631                 if (unlikely(mbuf == NULL)) {
632                         PMD_RX_LOG(ERR, "Error allocating mbuf");
633                         rxq->stats.rx_buf_alloc_failure++;
634                         err = ENOMEM;
635                         break;
636                 }
637
638                 vmxnet3_renew_desc(rxq, ring_id, mbuf);
639                 i++;
640         }
641
642         /* Return error only if no buffers are posted at present */
643         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
644                 return -err;
645         else
646                 return i;
647 }
648
649 /* MSS not provided by vmxnet3, guess one with available information */
650 static uint16_t
651 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
652                 struct rte_mbuf *rxm)
653 {
654         uint32_t hlen, slen;
655         struct ipv4_hdr *ipv4_hdr;
656         struct ipv6_hdr *ipv6_hdr;
657         struct tcp_hdr *tcp_hdr;
658         char *ptr;
659
660         RTE_ASSERT(rcd->tcp);
661
662         ptr = rte_pktmbuf_mtod(rxm, char *);
663         slen = rte_pktmbuf_data_len(rxm);
664         hlen = sizeof(struct ether_hdr);
665
666         if (rcd->v4) {
667                 if (unlikely(slen < hlen + sizeof(struct ipv4_hdr)))
668                         return hw->mtu - sizeof(struct ipv4_hdr)
669                                         - sizeof(struct tcp_hdr);
670
671                 ipv4_hdr = (struct ipv4_hdr *)(ptr + hlen);
672                 hlen += (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
673                                 IPV4_IHL_MULTIPLIER;
674         } else if (rcd->v6) {
675                 if (unlikely(slen < hlen + sizeof(struct ipv6_hdr)))
676                         return hw->mtu - sizeof(struct ipv6_hdr) -
677                                         sizeof(struct tcp_hdr);
678
679                 ipv6_hdr = (struct ipv6_hdr *)(ptr + hlen);
680                 hlen += sizeof(struct ipv6_hdr);
681                 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
682                         int frag;
683
684                         rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
685                                         &hlen, &frag);
686                 }
687         }
688
689         if (unlikely(slen < hlen + sizeof(struct tcp_hdr)))
690                 return hw->mtu - hlen - sizeof(struct tcp_hdr) +
691                                 sizeof(struct ether_hdr);
692
693         tcp_hdr = (struct tcp_hdr *)(ptr + hlen);
694         hlen += (tcp_hdr->data_off & 0xf0) >> 2;
695
696         if (rxm->udata64 > 1)
697                 return (rte_pktmbuf_pkt_len(rxm) - hlen +
698                                 rxm->udata64 - 1) / rxm->udata64;
699         else
700                 return hw->mtu - hlen + sizeof(struct ether_hdr);
701 }
702
703 /* Receive side checksum and other offloads */
704 static inline void
705 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
706                 struct rte_mbuf *rxm, const uint8_t sop)
707 {
708         uint64_t ol_flags = rxm->ol_flags;
709         uint32_t packet_type = rxm->packet_type;
710
711         /* Offloads set in sop */
712         if (sop) {
713                 /* Set packet type */
714                 packet_type |= RTE_PTYPE_L2_ETHER;
715
716                 /* Check large packet receive */
717                 if (VMXNET3_VERSION_GE_2(hw) &&
718                     rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
719                         const Vmxnet3_RxCompDescExt *rcde =
720                                         (const Vmxnet3_RxCompDescExt *)rcd;
721
722                         rxm->tso_segsz = rcde->mss;
723                         rxm->udata64 = rcde->segCnt;
724                         ol_flags |= PKT_RX_LRO;
725                 }
726         } else { /* Offloads set in eop */
727                 /* Check for RSS */
728                 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
729                         ol_flags |= PKT_RX_RSS_HASH;
730                         rxm->hash.rss = rcd->rssHash;
731                 }
732
733                 /* Check for hardware stripped VLAN tag */
734                 if (rcd->ts) {
735                         ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
736                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
737                 }
738
739                 /* Check packet type, checksum errors, etc. */
740                 if (rcd->cnc) {
741                         ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
742                 } else {
743                         if (rcd->v4) {
744                                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
745
746                                 if (rcd->ipc)
747                                         ol_flags |= PKT_RX_IP_CKSUM_GOOD;
748                                 else
749                                         ol_flags |= PKT_RX_IP_CKSUM_BAD;
750
751                                 if (rcd->tuc) {
752                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
753                                         if (rcd->tcp)
754                                                 packet_type |= RTE_PTYPE_L4_TCP;
755                                         else
756                                                 packet_type |= RTE_PTYPE_L4_UDP;
757                                 } else {
758                                         if (rcd->tcp) {
759                                                 packet_type |= RTE_PTYPE_L4_TCP;
760                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
761                                         } else if (rcd->udp) {
762                                                 packet_type |= RTE_PTYPE_L4_UDP;
763                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
764                                         }
765                                 }
766                         } else if (rcd->v6) {
767                                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
768
769                                 if (rcd->tuc) {
770                                         ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771                                         if (rcd->tcp)
772                                                 packet_type |= RTE_PTYPE_L4_TCP;
773                                         else
774                                                 packet_type |= RTE_PTYPE_L4_UDP;
775                                 } else {
776                                         if (rcd->tcp) {
777                                                 packet_type |= RTE_PTYPE_L4_TCP;
778                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779                                         } else if (rcd->udp) {
780                                                 packet_type |= RTE_PTYPE_L4_UDP;
781                                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
782                                         }
783                                 }
784                         } else {
785                                 packet_type |= RTE_PTYPE_UNKNOWN;
786                         }
787
788                         /* Old variants of vmxnet3 do not provide MSS */
789                         if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
790                                 rxm->tso_segsz = vmxnet3_guess_mss(hw,
791                                                 rcd, rxm);
792                 }
793         }
794
795         rxm->ol_flags = ol_flags;
796         rxm->packet_type = packet_type;
797 }
798
799 /*
800  * Process the Rx Completion Ring of given vmxnet3_rx_queue
801  * for nb_pkts burst and return the number of packets received
802  */
803 uint16_t
804 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
805 {
806         uint16_t nb_rx;
807         uint32_t nb_rxd, idx;
808         uint8_t ring_idx;
809         vmxnet3_rx_queue_t *rxq;
810         Vmxnet3_RxCompDesc *rcd;
811         vmxnet3_buf_info_t *rbi;
812         Vmxnet3_RxDesc *rxd;
813         struct rte_mbuf *rxm = NULL;
814         struct vmxnet3_hw *hw;
815
816         nb_rx = 0;
817         ring_idx = 0;
818         nb_rxd = 0;
819         idx = 0;
820
821         rxq = rx_queue;
822         hw = rxq->hw;
823
824         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
825
826         if (unlikely(rxq->stopped)) {
827                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
828                 return 0;
829         }
830
831         while (rcd->gen == rxq->comp_ring.gen) {
832                 struct rte_mbuf *newm;
833
834                 if (nb_rx >= nb_pkts)
835                         break;
836
837                 newm = rte_mbuf_raw_alloc(rxq->mp);
838                 if (unlikely(newm == NULL)) {
839                         PMD_RX_LOG(ERR, "Error allocating mbuf");
840                         rxq->stats.rx_buf_alloc_failure++;
841                         break;
842                 }
843
844                 idx = rcd->rxdIdx;
845                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
846                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
847                 RTE_SET_USED(rxd); /* used only for assert when enabled */
848                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
849
850                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
851
852                 RTE_ASSERT(rcd->len <= rxd->len);
853                 RTE_ASSERT(rbi->m);
854
855                 /* Get the packet buffer pointer from buf_info */
856                 rxm = rbi->m;
857
858                 /* Clear descriptor associated buf_info to be reused */
859                 rbi->m = NULL;
860                 rbi->bufPA = 0;
861
862                 /* Update the index that we received a packet */
863                 rxq->cmd_ring[ring_idx].next2comp = idx;
864
865                 /* For RCD with EOP set, check if there is frame error */
866                 if (unlikely(rcd->eop && rcd->err)) {
867                         rxq->stats.drop_total++;
868                         rxq->stats.drop_err++;
869
870                         if (!rcd->fcs) {
871                                 rxq->stats.drop_fcs++;
872                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
873                         }
874                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
875                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
876                                          rxq->comp_ring.base), rcd->rxdIdx);
877                         rte_pktmbuf_free_seg(rxm);
878                         if (rxq->start_seg) {
879                                 struct rte_mbuf *start = rxq->start_seg;
880
881                                 rxq->start_seg = NULL;
882                                 rte_pktmbuf_free(start);
883                         }
884                         goto rcd_done;
885                 }
886
887                 /* Initialize newly received packet buffer */
888                 rxm->port = rxq->port_id;
889                 rxm->nb_segs = 1;
890                 rxm->next = NULL;
891                 rxm->pkt_len = (uint16_t)rcd->len;
892                 rxm->data_len = (uint16_t)rcd->len;
893                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
894                 rxm->ol_flags = 0;
895                 rxm->vlan_tci = 0;
896                 rxm->packet_type = 0;
897
898                 /*
899                  * If this is the first buffer of the received packet,
900                  * set the pointer to the first mbuf of the packet
901                  * Otherwise, update the total length and the number of segments
902                  * of the current scattered packet, and update the pointer to
903                  * the last mbuf of the current packet.
904                  */
905                 if (rcd->sop) {
906                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
907
908                         if (unlikely(rcd->len == 0)) {
909                                 RTE_ASSERT(rcd->eop);
910
911                                 PMD_RX_LOG(DEBUG,
912                                            "Rx buf was skipped. rxring[%d][%d])",
913                                            ring_idx, idx);
914                                 rte_pktmbuf_free_seg(rxm);
915                                 goto rcd_done;
916                         }
917
918                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
919                                 uint8_t *rdd = rxq->data_ring.base +
920                                         idx * rxq->data_desc_size;
921
922                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
923                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
924                                            rdd, rcd->len);
925                         }
926
927                         rxq->start_seg = rxm;
928                         rxq->last_seg = rxm;
929                         vmxnet3_rx_offload(hw, rcd, rxm, 1);
930                 } else {
931                         struct rte_mbuf *start = rxq->start_seg;
932
933                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
934
935                         if (rxm->data_len) {
936                                 start->pkt_len += rxm->data_len;
937                                 start->nb_segs++;
938
939                                 rxq->last_seg->next = rxm;
940                                 rxq->last_seg = rxm;
941                         } else {
942                                 rte_pktmbuf_free_seg(rxm);
943                         }
944                 }
945
946                 if (rcd->eop) {
947                         struct rte_mbuf *start = rxq->start_seg;
948
949                         vmxnet3_rx_offload(hw, rcd, start, 0);
950                         rx_pkts[nb_rx++] = start;
951                         rxq->start_seg = NULL;
952                 }
953
954 rcd_done:
955                 rxq->cmd_ring[ring_idx].next2comp = idx;
956                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
957                                           rxq->cmd_ring[ring_idx].size);
958
959                 /* It's time to renew descriptors */
960                 vmxnet3_renew_desc(rxq, ring_idx, newm);
961                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
962                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
963                                                rxq->cmd_ring[ring_idx].next2fill);
964                 }
965
966                 /* Advance to the next descriptor in comp_ring */
967                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
968
969                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
970                 nb_rxd++;
971                 if (nb_rxd > rxq->cmd_ring[0].size) {
972                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
973                                    " relinquish control.");
974                         break;
975                 }
976         }
977
978         if (unlikely(nb_rxd == 0)) {
979                 uint32_t avail;
980                 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
981                         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
982                         if (unlikely(avail > 0)) {
983                                 /* try to alloc new buf and renew descriptors */
984                                 vmxnet3_post_rx_bufs(rxq, ring_idx);
985                         }
986                 }
987                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
988                         for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
989                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
990                                                        rxq->cmd_ring[ring_idx].next2fill);
991                         }
992                 }
993         }
994
995         return nb_rx;
996 }
997
998 int
999 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1000                            uint16_t queue_idx,
1001                            uint16_t nb_desc,
1002                            unsigned int socket_id,
1003                            const struct rte_eth_txconf *tx_conf)
1004 {
1005         struct vmxnet3_hw *hw = dev->data->dev_private;
1006         const struct rte_memzone *mz;
1007         struct vmxnet3_tx_queue *txq;
1008         struct vmxnet3_cmd_ring *ring;
1009         struct vmxnet3_comp_ring *comp_ring;
1010         struct vmxnet3_data_ring *data_ring;
1011         int size;
1012
1013         PMD_INIT_FUNC_TRACE();
1014
1015         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
1016             ETH_TXQ_FLAGS_NOXSUMSCTP) {
1017                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
1018                 return -EINVAL;
1019         }
1020
1021         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1022                           RTE_CACHE_LINE_SIZE);
1023         if (txq == NULL) {
1024                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1025                 return -ENOMEM;
1026         }
1027
1028         txq->queue_id = queue_idx;
1029         txq->port_id = dev->data->port_id;
1030         txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1031         txq->hw = hw;
1032         txq->qid = queue_idx;
1033         txq->stopped = TRUE;
1034         txq->txdata_desc_size = hw->txdata_desc_size;
1035
1036         ring = &txq->cmd_ring;
1037         comp_ring = &txq->comp_ring;
1038         data_ring = &txq->data_ring;
1039
1040         /* Tx vmxnet ring length should be between 512-4096 */
1041         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1042                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1043                              VMXNET3_DEF_TX_RING_SIZE);
1044                 return -EINVAL;
1045         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1046                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1047                              VMXNET3_TX_RING_MAX_SIZE);
1048                 return -EINVAL;
1049         } else {
1050                 ring->size = nb_desc;
1051                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1052         }
1053         comp_ring->size = data_ring->size = ring->size;
1054
1055         /* Tx vmxnet rings structure initialization*/
1056         ring->next2fill = 0;
1057         ring->next2comp = 0;
1058         ring->gen = VMXNET3_INIT_GEN;
1059         comp_ring->next2proc = 0;
1060         comp_ring->gen = VMXNET3_INIT_GEN;
1061
1062         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1063         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1064         size += txq->txdata_desc_size * data_ring->size;
1065
1066         mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1067                                       VMXNET3_RING_BA_ALIGN, socket_id);
1068         if (mz == NULL) {
1069                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1070                 return -ENOMEM;
1071         }
1072         txq->mz = mz;
1073         memset(mz->addr, 0, mz->len);
1074
1075         /* cmd_ring initialization */
1076         ring->base = mz->addr;
1077         ring->basePA = mz->iova;
1078
1079         /* comp_ring initialization */
1080         comp_ring->base = ring->base + ring->size;
1081         comp_ring->basePA = ring->basePA +
1082                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1083
1084         /* data_ring initialization */
1085         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1086         data_ring->basePA = comp_ring->basePA +
1087                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1088
1089         /* cmd_ring0 buf_info allocation */
1090         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1091                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1092         if (ring->buf_info == NULL) {
1093                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1094                 return -ENOMEM;
1095         }
1096
1097         /* Update the data portion with txq */
1098         dev->data->tx_queues[queue_idx] = txq;
1099
1100         return 0;
1101 }
1102
1103 int
1104 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1105                            uint16_t queue_idx,
1106                            uint16_t nb_desc,
1107                            unsigned int socket_id,
1108                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1109                            struct rte_mempool *mp)
1110 {
1111         const struct rte_memzone *mz;
1112         struct vmxnet3_rx_queue *rxq;
1113         struct vmxnet3_hw *hw = dev->data->dev_private;
1114         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1115         struct vmxnet3_comp_ring *comp_ring;
1116         struct vmxnet3_rx_data_ring *data_ring;
1117         int size;
1118         uint8_t i;
1119         char mem_name[32];
1120
1121         PMD_INIT_FUNC_TRACE();
1122
1123         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1124                           RTE_CACHE_LINE_SIZE);
1125         if (rxq == NULL) {
1126                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1127                 return -ENOMEM;
1128         }
1129
1130         rxq->mp = mp;
1131         rxq->queue_id = queue_idx;
1132         rxq->port_id = dev->data->port_id;
1133         rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1134         rxq->hw = hw;
1135         rxq->qid1 = queue_idx;
1136         rxq->qid2 = queue_idx + hw->num_rx_queues;
1137         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1138         rxq->data_desc_size = hw->rxdata_desc_size;
1139         rxq->stopped = TRUE;
1140
1141         ring0 = &rxq->cmd_ring[0];
1142         ring1 = &rxq->cmd_ring[1];
1143         comp_ring = &rxq->comp_ring;
1144         data_ring = &rxq->data_ring;
1145
1146         /* Rx vmxnet rings length should be between 256-4096 */
1147         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1148                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1149                 return -EINVAL;
1150         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1151                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1152                 return -EINVAL;
1153         } else {
1154                 ring0->size = nb_desc;
1155                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1156                 ring1->size = ring0->size;
1157         }
1158
1159         comp_ring->size = ring0->size + ring1->size;
1160         data_ring->size = ring0->size;
1161
1162         /* Rx vmxnet rings structure initialization */
1163         ring0->next2fill = 0;
1164         ring1->next2fill = 0;
1165         ring0->next2comp = 0;
1166         ring1->next2comp = 0;
1167         ring0->gen = VMXNET3_INIT_GEN;
1168         ring1->gen = VMXNET3_INIT_GEN;
1169         comp_ring->next2proc = 0;
1170         comp_ring->gen = VMXNET3_INIT_GEN;
1171
1172         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1173         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1174         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1175                 size += rxq->data_desc_size * data_ring->size;
1176
1177         mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1178                                       VMXNET3_RING_BA_ALIGN, socket_id);
1179         if (mz == NULL) {
1180                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1181                 return -ENOMEM;
1182         }
1183         rxq->mz = mz;
1184         memset(mz->addr, 0, mz->len);
1185
1186         /* cmd_ring0 initialization */
1187         ring0->base = mz->addr;
1188         ring0->basePA = mz->iova;
1189
1190         /* cmd_ring1 initialization */
1191         ring1->base = ring0->base + ring0->size;
1192         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1193
1194         /* comp_ring initialization */
1195         comp_ring->base = ring1->base + ring1->size;
1196         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1197                 ring1->size;
1198
1199         /* data_ring initialization */
1200         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1201                 data_ring->base =
1202                         (uint8_t *)(comp_ring->base + comp_ring->size);
1203                 data_ring->basePA = comp_ring->basePA +
1204                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1205         }
1206
1207         /* cmd_ring0-cmd_ring1 buf_info allocation */
1208         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1209
1210                 ring = &rxq->cmd_ring[i];
1211                 ring->rid = i;
1212                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1213
1214                 ring->buf_info = rte_zmalloc(mem_name,
1215                                              ring->size * sizeof(vmxnet3_buf_info_t),
1216                                              RTE_CACHE_LINE_SIZE);
1217                 if (ring->buf_info == NULL) {
1218                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1219                         return -ENOMEM;
1220                 }
1221         }
1222
1223         /* Update the data portion with rxq */
1224         dev->data->rx_queues[queue_idx] = rxq;
1225
1226         return 0;
1227 }
1228
1229 /*
1230  * Initializes Receive Unit
1231  * Load mbufs in rx queue in advance
1232  */
1233 int
1234 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1235 {
1236         struct vmxnet3_hw *hw = dev->data->dev_private;
1237
1238         int i, ret;
1239         uint8_t j;
1240
1241         PMD_INIT_FUNC_TRACE();
1242
1243         for (i = 0; i < hw->num_rx_queues; i++) {
1244                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1245
1246                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1247                         /* Passing 0 as alloc_num will allocate full ring */
1248                         ret = vmxnet3_post_rx_bufs(rxq, j);
1249                         if (ret <= 0) {
1250                                 PMD_INIT_LOG(ERR,
1251                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1252                                              i, j);
1253                                 return -ret;
1254                         }
1255                         /*
1256                          * Updating device with the index:next2fill to fill the
1257                          * mbufs for coming packets.
1258                          */
1259                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1260                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1261                                                        rxq->cmd_ring[j].next2fill);
1262                         }
1263                 }
1264                 rxq->stopped = FALSE;
1265                 rxq->start_seg = NULL;
1266         }
1267
1268         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1269                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1270
1271                 txq->stopped = FALSE;
1272         }
1273
1274         return 0;
1275 }
1276
1277 static uint8_t rss_intel_key[40] = {
1278         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1279         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1280         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1281         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1282         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1283 };
1284
1285 /*
1286  * Configure RSS feature
1287  */
1288 int
1289 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1290 {
1291         struct vmxnet3_hw *hw = dev->data->dev_private;
1292         struct VMXNET3_RSSConf *dev_rss_conf;
1293         struct rte_eth_rss_conf *port_rss_conf;
1294         uint64_t rss_hf;
1295         uint8_t i, j;
1296
1297         PMD_INIT_FUNC_TRACE();
1298
1299         dev_rss_conf = hw->rss_conf;
1300         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1301
1302         /* loading hashFunc */
1303         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1304         /* loading hashKeySize */
1305         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1306         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1307         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1308
1309         if (port_rss_conf->rss_key == NULL) {
1310                 /* Default hash key */
1311                 port_rss_conf->rss_key = rss_intel_key;
1312         }
1313
1314         /* loading hashKey */
1315         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1316                dev_rss_conf->hashKeySize);
1317
1318         /* loading indTable */
1319         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1320                 if (j == dev->data->nb_rx_queues)
1321                         j = 0;
1322                 dev_rss_conf->indTable[i] = j;
1323         }
1324
1325         /* loading hashType */
1326         dev_rss_conf->hashType = 0;
1327         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1328         if (rss_hf & ETH_RSS_IPV4)
1329                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1330         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1331                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1332         if (rss_hf & ETH_RSS_IPV6)
1333                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1334         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1335                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1336
1337         return VMXNET3_SUCCESS;
1338 }