6649c3f2a3b5a0b3afd55a2a25e1906fe24ebba5
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
81                 PKT_TX_VLAN_PKT | \
82                 PKT_TX_L4_MASK |  \
83                 PKT_TX_TCP_SEG)
84
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
86         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101         uint32_t avail = 0;
102
103         if (rxq == NULL)
104                 return;
105
106         PMD_RX_LOG(DEBUG,
107                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109         PMD_RX_LOG(DEBUG,
110                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111                    (unsigned long)rxq->cmd_ring[0].basePA,
112                    (unsigned long)rxq->cmd_ring[1].basePA,
113                    (unsigned long)rxq->comp_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116         PMD_RX_LOG(DEBUG,
117                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)rxq->cmd_ring[0].size, avail,
119                    rxq->comp_ring.next2proc,
120                    rxq->cmd_ring[0].size - avail);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125                    rxq->cmd_ring[1].size - avail);
126
127 }
128
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132         uint32_t avail = 0;
133
134         if (txq == NULL)
135                 return;
136
137         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140                    (unsigned long)txq->cmd_ring.basePA,
141                    (unsigned long)txq->comp_ring.basePA,
142                    (unsigned long)txq->data_ring.basePA);
143
144         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146                    (uint32_t)txq->cmd_ring.size, avail,
147                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154         while (ring->next2comp != ring->next2fill) {
155                 /* No need to worry about desc ownership, device is quiesced by now. */
156                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157
158                 if (buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166 }
167
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171         uint32_t i;
172
173         for (i = 0; i < ring->size; i++) {
174                 /* No need to worry about desc ownership, device is quiesced by now. */
175                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176
177                 if (buf_info->m) {
178                         rte_pktmbuf_free_seg(buf_info->m);
179                         buf_info->m = NULL;
180                         buf_info->bufPA = 0;
181                         buf_info->len = 0;
182                 }
183                 vmxnet3_cmd_ring_adv_next2comp(ring);
184         }
185 }
186
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190         rte_free(ring->buf_info);
191         ring->buf_info = NULL;
192 }
193
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197         vmxnet3_tx_queue_t *tq = txq;
198
199         if (tq != NULL) {
200                 /* Release mbufs */
201                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202                 /* Release the cmd_ring */
203                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204         }
205 }
206
207 void
208 vmxnet3_dev_rx_queue_release(void *rxq)
209 {
210         int i;
211         vmxnet3_rx_queue_t *rq = rxq;
212
213         if (rq != NULL) {
214                 /* Release mbufs */
215                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
217
218                 /* Release both the cmd_rings */
219                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
221         }
222 }
223
224 static void
225 vmxnet3_dev_tx_queue_reset(void *txq)
226 {
227         vmxnet3_tx_queue_t *tq = txq;
228         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
231         int size;
232
233         if (tq != NULL) {
234                 /* Release the cmd_ring mbufs */
235                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
236         }
237
238         /* Tx vmxnet rings structure initialization*/
239         ring->next2fill = 0;
240         ring->next2comp = 0;
241         ring->gen = VMXNET3_INIT_GEN;
242         comp_ring->next2proc = 0;
243         comp_ring->gen = VMXNET3_INIT_GEN;
244
245         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247         size += tq->txdata_desc_size * data_ring->size;
248
249         memset(ring->base, 0, size);
250 }
251
252 static void
253 vmxnet3_dev_rx_queue_reset(void *rxq)
254 {
255         int i;
256         vmxnet3_rx_queue_t *rq = rxq;
257         struct vmxnet3_hw *hw = rq->hw;
258         struct vmxnet3_cmd_ring *ring0, *ring1;
259         struct vmxnet3_comp_ring *comp_ring;
260         struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
261         int size;
262
263         if (rq != NULL) {
264                 /* Release both the cmd_rings mbufs */
265                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
266                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
267         }
268
269         ring0 = &rq->cmd_ring[0];
270         ring1 = &rq->cmd_ring[1];
271         comp_ring = &rq->comp_ring;
272
273         /* Rx vmxnet rings structure initialization */
274         ring0->next2fill = 0;
275         ring1->next2fill = 0;
276         ring0->next2comp = 0;
277         ring1->next2comp = 0;
278         ring0->gen = VMXNET3_INIT_GEN;
279         ring1->gen = VMXNET3_INIT_GEN;
280         comp_ring->next2proc = 0;
281         comp_ring->gen = VMXNET3_INIT_GEN;
282
283         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
284         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
285         if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
286                 size += rq->data_desc_size * data_ring->size;
287
288         memset(ring0->base, 0, size);
289 }
290
291 void
292 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
293 {
294         unsigned i;
295
296         PMD_INIT_FUNC_TRACE();
297
298         for (i = 0; i < dev->data->nb_tx_queues; i++) {
299                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
300
301                 if (txq != NULL) {
302                         txq->stopped = TRUE;
303                         vmxnet3_dev_tx_queue_reset(txq);
304                 }
305         }
306
307         for (i = 0; i < dev->data->nb_rx_queues; i++) {
308                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
309
310                 if (rxq != NULL) {
311                         rxq->stopped = TRUE;
312                         vmxnet3_dev_rx_queue_reset(rxq);
313                 }
314         }
315 }
316
317 static int
318 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
319 {
320         int completed = 0;
321         struct rte_mbuf *mbuf;
322
323         /* Release cmd_ring descriptor and free mbuf */
324         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
325
326         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
327         if (mbuf == NULL)
328                 rte_panic("EOP desc does not point to a valid mbuf");
329         rte_pktmbuf_free(mbuf);
330
331         txq->cmd_ring.buf_info[eop_idx].m = NULL;
332
333         while (txq->cmd_ring.next2comp != eop_idx) {
334                 /* no out-of-order completion */
335                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
336                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
337                 completed++;
338         }
339
340         /* Mark the txd for which tcd was generated as completed */
341         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
342
343         return completed + 1;
344 }
345
346 static void
347 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
348 {
349         int completed = 0;
350         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
351         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
352                 (comp_ring->base + comp_ring->next2proc);
353
354         while (tcd->gen == comp_ring->gen) {
355                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
356
357                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
358                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
359                                                     comp_ring->next2proc);
360         }
361
362         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
363 }
364
365 uint16_t
366 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
367         uint16_t nb_pkts)
368 {
369         int32_t ret;
370         uint32_t i;
371         uint64_t ol_flags;
372         struct rte_mbuf *m;
373
374         for (i = 0; i != nb_pkts; i++) {
375                 m = tx_pkts[i];
376                 ol_flags = m->ol_flags;
377
378                 /* Non-TSO packet cannot occupy more than
379                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
380                  */
381                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
382                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
383                         rte_errno = -EINVAL;
384                         return i;
385                 }
386
387                 /* check that only supported TX offloads are requested. */
388                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
389                                 (ol_flags & PKT_TX_L4_MASK) ==
390                                 PKT_TX_SCTP_CKSUM) {
391                         rte_errno = -ENOTSUP;
392                         return i;
393                 }
394
395 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
396                 ret = rte_validate_tx_offload(m);
397                 if (ret != 0) {
398                         rte_errno = ret;
399                         return i;
400                 }
401 #endif
402                 ret = rte_net_intel_cksum_prepare(m);
403                 if (ret != 0) {
404                         rte_errno = ret;
405                         return i;
406                 }
407         }
408
409         return i;
410 }
411
412 uint16_t
413 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
414                   uint16_t nb_pkts)
415 {
416         uint16_t nb_tx;
417         vmxnet3_tx_queue_t *txq = tx_queue;
418         struct vmxnet3_hw *hw = txq->hw;
419         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
420         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
421
422         if (unlikely(txq->stopped)) {
423                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
424                 return 0;
425         }
426
427         /* Free up the comp_descriptors aggressively */
428         vmxnet3_tq_tx_complete(txq);
429
430         nb_tx = 0;
431         while (nb_tx < nb_pkts) {
432                 Vmxnet3_GenericDesc *gdesc;
433                 vmxnet3_buf_info_t *tbi;
434                 uint32_t first2fill, avail, dw2;
435                 struct rte_mbuf *txm = tx_pkts[nb_tx];
436                 struct rte_mbuf *m_seg = txm;
437                 int copy_size = 0;
438                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
439                 /* # of descriptors needed for a packet. */
440                 unsigned count = txm->nb_segs;
441
442                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
443                 if (count > avail) {
444                         /* Is command ring full? */
445                         if (unlikely(avail == 0)) {
446                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
447                                 txq->stats.tx_ring_full++;
448                                 txq->stats.drop_total += (nb_pkts - nb_tx);
449                                 break;
450                         }
451
452                         /* Command ring is not full but cannot handle the
453                          * multi-segmented packet. Let's try the next packet
454                          * in this case.
455                          */
456                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
457                                    "(avail %d needed %d)", avail, count);
458                         txq->stats.drop_total++;
459                         if (tso)
460                                 txq->stats.drop_tso++;
461                         rte_pktmbuf_free(txm);
462                         nb_tx++;
463                         continue;
464                 }
465
466                 /* Drop non-TSO packet that is excessively fragmented */
467                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
468                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
469                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
470                         txq->stats.drop_too_many_segs++;
471                         txq->stats.drop_total++;
472                         rte_pktmbuf_free(txm);
473                         nb_tx++;
474                         continue;
475                 }
476
477                 if (txm->nb_segs == 1 &&
478                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
479                         struct Vmxnet3_TxDataDesc *tdd;
480
481                         tdd = (struct Vmxnet3_TxDataDesc *)
482                                 ((uint8 *)txq->data_ring.base +
483                                  txq->cmd_ring.next2fill *
484                                  txq->txdata_desc_size);
485                         copy_size = rte_pktmbuf_pkt_len(txm);
486                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
487                 }
488
489                 /* use the previous gen bit for the SOP desc */
490                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
491                 first2fill = txq->cmd_ring.next2fill;
492                 do {
493                         /* Remember the transmit buffer for cleanup */
494                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
495
496                         /* NB: the following assumes that VMXNET3 maximum
497                          * transmit buffer size (16K) is greater than
498                          * maximum size of mbuf segment size.
499                          */
500                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
501                         if (copy_size) {
502                                 uint64 offset = txq->cmd_ring.next2fill *
503                                                 txq->txdata_desc_size;
504                                 gdesc->txd.addr =
505                                         rte_cpu_to_le_64(txq->data_ring.basePA +
506                                                          offset);
507                         } else {
508                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
509                         }
510
511                         gdesc->dword[2] = dw2 | m_seg->data_len;
512                         gdesc->dword[3] = 0;
513
514                         /* move to the next2fill descriptor */
515                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
516
517                         /* use the right gen for non-SOP desc */
518                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
519                 } while ((m_seg = m_seg->next) != NULL);
520
521                 /* set the last buf_info for the pkt */
522                 tbi->m = txm;
523                 /* Update the EOP descriptor */
524                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
525
526                 /* Add VLAN tag if present */
527                 gdesc = txq->cmd_ring.base + first2fill;
528                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
529                         gdesc->txd.ti = 1;
530                         gdesc->txd.tci = txm->vlan_tci;
531                 }
532
533                 if (tso) {
534                         uint16_t mss = txm->tso_segsz;
535
536                         RTE_ASSERT(mss > 0);
537
538                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
539                         gdesc->txd.om = VMXNET3_OM_TSO;
540                         gdesc->txd.msscof = mss;
541
542                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
543                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
544                         gdesc->txd.om = VMXNET3_OM_CSUM;
545                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
546
547                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
548                         case PKT_TX_TCP_CKSUM:
549                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
550                                 break;
551                         case PKT_TX_UDP_CKSUM:
552                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
553                                 break;
554                         default:
555                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556                                            txm->ol_flags & PKT_TX_L4_MASK);
557                                 abort();
558                         }
559                         deferred++;
560                 } else {
561                         gdesc->txd.hlen = 0;
562                         gdesc->txd.om = VMXNET3_OM_NONE;
563                         gdesc->txd.msscof = 0;
564                         deferred++;
565                 }
566
567                 /* flip the GEN bit on the SOP */
568                 rte_compiler_barrier();
569                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570
571                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572                 nb_tx++;
573         }
574
575         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576
577         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578                 txq_ctrl->txNumDeferred = 0;
579                 /* Notify vSwitch that packets are available. */
580                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581                                        txq->cmd_ring.next2fill);
582         }
583
584         return nb_tx;
585 }
586
587 static inline void
588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589                    struct rte_mbuf *mbuf)
590 {
591         uint32_t val = 0;
592         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593         struct Vmxnet3_RxDesc *rxd =
594                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596
597         if (ring_id == 0)
598                 val = VMXNET3_RXD_BTYPE_HEAD;
599         else
600                 val = VMXNET3_RXD_BTYPE_BODY;
601
602         buf_info->m = mbuf;
603         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
604         buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
605
606         rxd->addr = buf_info->bufPA;
607         rxd->btype = val;
608         rxd->len = buf_info->len;
609         rxd->gen = ring->gen;
610
611         vmxnet3_cmd_ring_adv_next2fill(ring);
612 }
613 /*
614  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
615  *  so that device can receive packets in those buffers.
616  *  Ring layout:
617  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
618  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
619  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
620  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
621  *      only for LRO.
622  */
623 static int
624 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
625 {
626         int err = 0;
627         uint32_t i = 0, val = 0;
628         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
629
630         if (ring_id == 0) {
631                 /* Usually: One HEAD type buf per packet
632                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
633                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
634                  */
635
636                 /* We use single packet buffer so all heads here */
637                 val = VMXNET3_RXD_BTYPE_HEAD;
638         } else {
639                 /* All BODY type buffers for 2nd ring */
640                 val = VMXNET3_RXD_BTYPE_BODY;
641         }
642
643         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
644                 struct Vmxnet3_RxDesc *rxd;
645                 struct rte_mbuf *mbuf;
646                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
647
648                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
649
650                 /* Allocate blank mbuf for the current Rx Descriptor */
651                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
652                 if (unlikely(mbuf == NULL)) {
653                         PMD_RX_LOG(ERR, "Error allocating mbuf");
654                         rxq->stats.rx_buf_alloc_failure++;
655                         err = ENOMEM;
656                         break;
657                 }
658
659                 /*
660                  * Load mbuf pointer into buf_info[ring_size]
661                  * buf_info structure is equivalent to cookie for virtio-virtqueue
662                  */
663                 buf_info->m = mbuf;
664                 buf_info->len = (uint16_t)(mbuf->buf_len -
665                                            RTE_PKTMBUF_HEADROOM);
666                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
667
668                 /* Load Rx Descriptor with the buffer's GPA */
669                 rxd->addr = buf_info->bufPA;
670
671                 /* After this point rxd->addr MUST not be NULL */
672                 rxd->btype = val;
673                 rxd->len = buf_info->len;
674                 /* Flip gen bit at the end to change ownership */
675                 rxd->gen = ring->gen;
676
677                 vmxnet3_cmd_ring_adv_next2fill(ring);
678                 i++;
679         }
680
681         /* Return error only if no buffers are posted at present */
682         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
683                 return -err;
684         else
685                 return i;
686 }
687
688
689 /* Receive side checksum and other offloads */
690 static void
691 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
692 {
693         /* Check for RSS */
694         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
695                 rxm->ol_flags |= PKT_RX_RSS_HASH;
696                 rxm->hash.rss = rcd->rssHash;
697         }
698
699         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
700         if (rcd->v4) {
701                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
702                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
703
704                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
705                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
706                 else
707                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
708
709                 if (!rcd->cnc) {
710                         if (!rcd->ipc)
711                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
712
713                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
714                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
715                 }
716         }
717 }
718
719 /*
720  * Process the Rx Completion Ring of given vmxnet3_rx_queue
721  * for nb_pkts burst and return the number of packets received
722  */
723 uint16_t
724 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
725 {
726         uint16_t nb_rx;
727         uint32_t nb_rxd, idx;
728         uint8_t ring_idx;
729         vmxnet3_rx_queue_t *rxq;
730         Vmxnet3_RxCompDesc *rcd;
731         vmxnet3_buf_info_t *rbi;
732         Vmxnet3_RxDesc *rxd;
733         struct rte_mbuf *rxm = NULL;
734         struct vmxnet3_hw *hw;
735
736         nb_rx = 0;
737         ring_idx = 0;
738         nb_rxd = 0;
739         idx = 0;
740
741         rxq = rx_queue;
742         hw = rxq->hw;
743
744         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
745
746         if (unlikely(rxq->stopped)) {
747                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
748                 return 0;
749         }
750
751         while (rcd->gen == rxq->comp_ring.gen) {
752                 struct rte_mbuf *newm;
753
754                 if (nb_rx >= nb_pkts)
755                         break;
756
757                 newm = rte_mbuf_raw_alloc(rxq->mp);
758                 if (unlikely(newm == NULL)) {
759                         PMD_RX_LOG(ERR, "Error allocating mbuf");
760                         rxq->stats.rx_buf_alloc_failure++;
761                         break;
762                 }
763
764                 idx = rcd->rxdIdx;
765                 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
766                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
767                 RTE_SET_USED(rxd); /* used only for assert when enabled */
768                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
769
770                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
771
772                 RTE_ASSERT(rcd->len <= rxd->len);
773                 RTE_ASSERT(rbi->m);
774
775                 /* Get the packet buffer pointer from buf_info */
776                 rxm = rbi->m;
777
778                 /* Clear descriptor associated buf_info to be reused */
779                 rbi->m = NULL;
780                 rbi->bufPA = 0;
781
782                 /* Update the index that we received a packet */
783                 rxq->cmd_ring[ring_idx].next2comp = idx;
784
785                 /* For RCD with EOP set, check if there is frame error */
786                 if (unlikely(rcd->eop && rcd->err)) {
787                         rxq->stats.drop_total++;
788                         rxq->stats.drop_err++;
789
790                         if (!rcd->fcs) {
791                                 rxq->stats.drop_fcs++;
792                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
793                         }
794                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
795                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
796                                          rxq->comp_ring.base), rcd->rxdIdx);
797                         rte_pktmbuf_free_seg(rxm);
798                         goto rcd_done;
799                 }
800
801                 /* Initialize newly received packet buffer */
802                 rxm->port = rxq->port_id;
803                 rxm->nb_segs = 1;
804                 rxm->next = NULL;
805                 rxm->pkt_len = (uint16_t)rcd->len;
806                 rxm->data_len = (uint16_t)rcd->len;
807                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
808                 rxm->ol_flags = 0;
809                 rxm->vlan_tci = 0;
810
811                 /*
812                  * If this is the first buffer of the received packet,
813                  * set the pointer to the first mbuf of the packet
814                  * Otherwise, update the total length and the number of segments
815                  * of the current scattered packet, and update the pointer to
816                  * the last mbuf of the current packet.
817                  */
818                 if (rcd->sop) {
819                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
820
821                         if (unlikely(rcd->len == 0)) {
822                                 RTE_ASSERT(rcd->eop);
823
824                                 PMD_RX_LOG(DEBUG,
825                                            "Rx buf was skipped. rxring[%d][%d])",
826                                            ring_idx, idx);
827                                 rte_pktmbuf_free_seg(rxm);
828                                 goto rcd_done;
829                         }
830
831                         if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
832                                 uint8_t *rdd = rxq->data_ring.base +
833                                         idx * rxq->data_desc_size;
834
835                                 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
836                                 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
837                                            rdd, rcd->len);
838                         }
839
840                         rxq->start_seg = rxm;
841                         vmxnet3_rx_offload(rcd, rxm);
842                 } else {
843                         struct rte_mbuf *start = rxq->start_seg;
844
845                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
846
847                         start->pkt_len += rxm->data_len;
848                         start->nb_segs++;
849
850                         rxq->last_seg->next = rxm;
851                 }
852                 rxq->last_seg = rxm;
853
854                 if (rcd->eop) {
855                         struct rte_mbuf *start = rxq->start_seg;
856
857                         /* Check for hardware stripped VLAN tag */
858                         if (rcd->ts) {
859                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
860                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
861                         }
862
863                         rx_pkts[nb_rx++] = start;
864                         rxq->start_seg = NULL;
865                 }
866
867 rcd_done:
868                 rxq->cmd_ring[ring_idx].next2comp = idx;
869                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
870                                           rxq->cmd_ring[ring_idx].size);
871
872                 /* It's time to renew descriptors */
873                 vmxnet3_renew_desc(rxq, ring_idx, newm);
874                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
875                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
876                                                rxq->cmd_ring[ring_idx].next2fill);
877                 }
878
879                 /* Advance to the next descriptor in comp_ring */
880                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
881
882                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
883                 nb_rxd++;
884                 if (nb_rxd > rxq->cmd_ring[0].size) {
885                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
886                                    " relinquish control.");
887                         break;
888                 }
889         }
890
891         return nb_rx;
892 }
893
894 /*
895  * Create memzone for device rings. malloc can't be used as the physical address is
896  * needed. If the memzone is already created, then this function returns a ptr
897  * to the old one.
898  */
899 static const struct rte_memzone *
900 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
901                       uint16_t queue_id, uint32_t ring_size, int socket_id)
902 {
903         char z_name[RTE_MEMZONE_NAMESIZE];
904         const struct rte_memzone *mz;
905
906         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
907                  dev->driver->pci_drv.driver.name, ring_name,
908                  dev->data->port_id, queue_id);
909
910         mz = rte_memzone_lookup(z_name);
911         if (mz)
912                 return mz;
913
914         return rte_memzone_reserve_aligned(z_name, ring_size,
915                                            socket_id, 0, VMXNET3_RING_BA_ALIGN);
916 }
917
918 int
919 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
920                            uint16_t queue_idx,
921                            uint16_t nb_desc,
922                            unsigned int socket_id,
923                            __rte_unused const struct rte_eth_txconf *tx_conf)
924 {
925         struct vmxnet3_hw *hw = dev->data->dev_private;
926         const struct rte_memzone *mz;
927         struct vmxnet3_tx_queue *txq;
928         struct vmxnet3_cmd_ring *ring;
929         struct vmxnet3_comp_ring *comp_ring;
930         struct vmxnet3_data_ring *data_ring;
931         int size;
932
933         PMD_INIT_FUNC_TRACE();
934
935         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
936             ETH_TXQ_FLAGS_NOXSUMSCTP) {
937                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
938                 return -EINVAL;
939         }
940
941         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
942                           RTE_CACHE_LINE_SIZE);
943         if (txq == NULL) {
944                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
945                 return -ENOMEM;
946         }
947
948         txq->queue_id = queue_idx;
949         txq->port_id = dev->data->port_id;
950         txq->shared = &hw->tqd_start[queue_idx];
951         txq->hw = hw;
952         txq->qid = queue_idx;
953         txq->stopped = TRUE;
954         txq->txdata_desc_size = hw->txdata_desc_size;
955
956         ring = &txq->cmd_ring;
957         comp_ring = &txq->comp_ring;
958         data_ring = &txq->data_ring;
959
960         /* Tx vmxnet ring length should be between 512-4096 */
961         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
962                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
963                              VMXNET3_DEF_TX_RING_SIZE);
964                 return -EINVAL;
965         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
966                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
967                              VMXNET3_TX_RING_MAX_SIZE);
968                 return -EINVAL;
969         } else {
970                 ring->size = nb_desc;
971                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
972         }
973         comp_ring->size = data_ring->size = ring->size;
974
975         /* Tx vmxnet rings structure initialization*/
976         ring->next2fill = 0;
977         ring->next2comp = 0;
978         ring->gen = VMXNET3_INIT_GEN;
979         comp_ring->next2proc = 0;
980         comp_ring->gen = VMXNET3_INIT_GEN;
981
982         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
983         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
984         size += txq->txdata_desc_size * data_ring->size;
985
986         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
987         if (mz == NULL) {
988                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
989                 return -ENOMEM;
990         }
991         memset(mz->addr, 0, mz->len);
992
993         /* cmd_ring initialization */
994         ring->base = mz->addr;
995         ring->basePA = mz->phys_addr;
996
997         /* comp_ring initialization */
998         comp_ring->base = ring->base + ring->size;
999         comp_ring->basePA = ring->basePA +
1000                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1001
1002         /* data_ring initialization */
1003         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1004         data_ring->basePA = comp_ring->basePA +
1005                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1006
1007         /* cmd_ring0 buf_info allocation */
1008         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1009                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1010         if (ring->buf_info == NULL) {
1011                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1012                 return -ENOMEM;
1013         }
1014
1015         /* Update the data portion with txq */
1016         dev->data->tx_queues[queue_idx] = txq;
1017
1018         return 0;
1019 }
1020
1021 int
1022 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1023                            uint16_t queue_idx,
1024                            uint16_t nb_desc,
1025                            unsigned int socket_id,
1026                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1027                            struct rte_mempool *mp)
1028 {
1029         const struct rte_memzone *mz;
1030         struct vmxnet3_rx_queue *rxq;
1031         struct vmxnet3_hw *hw = dev->data->dev_private;
1032         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1033         struct vmxnet3_comp_ring *comp_ring;
1034         struct vmxnet3_rx_data_ring *data_ring;
1035         int size;
1036         uint8_t i;
1037         char mem_name[32];
1038
1039         PMD_INIT_FUNC_TRACE();
1040
1041         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1042                           RTE_CACHE_LINE_SIZE);
1043         if (rxq == NULL) {
1044                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1045                 return -ENOMEM;
1046         }
1047
1048         rxq->mp = mp;
1049         rxq->queue_id = queue_idx;
1050         rxq->port_id = dev->data->port_id;
1051         rxq->shared = &hw->rqd_start[queue_idx];
1052         rxq->hw = hw;
1053         rxq->qid1 = queue_idx;
1054         rxq->qid2 = queue_idx + hw->num_rx_queues;
1055         rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1056         rxq->data_desc_size = hw->rxdata_desc_size;
1057         rxq->stopped = TRUE;
1058
1059         ring0 = &rxq->cmd_ring[0];
1060         ring1 = &rxq->cmd_ring[1];
1061         comp_ring = &rxq->comp_ring;
1062         data_ring = &rxq->data_ring;
1063
1064         /* Rx vmxnet rings length should be between 256-4096 */
1065         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1066                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1067                 return -EINVAL;
1068         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1069                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1070                 return -EINVAL;
1071         } else {
1072                 ring0->size = nb_desc;
1073                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1074                 ring1->size = ring0->size;
1075         }
1076
1077         comp_ring->size = ring0->size + ring1->size;
1078         data_ring->size = ring0->size;
1079
1080         /* Rx vmxnet rings structure initialization */
1081         ring0->next2fill = 0;
1082         ring1->next2fill = 0;
1083         ring0->next2comp = 0;
1084         ring1->next2comp = 0;
1085         ring0->gen = VMXNET3_INIT_GEN;
1086         ring1->gen = VMXNET3_INIT_GEN;
1087         comp_ring->next2proc = 0;
1088         comp_ring->gen = VMXNET3_INIT_GEN;
1089
1090         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1091         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1092         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1093                 size += rxq->data_desc_size * data_ring->size;
1094
1095         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1096         if (mz == NULL) {
1097                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1098                 return -ENOMEM;
1099         }
1100         memset(mz->addr, 0, mz->len);
1101
1102         /* cmd_ring0 initialization */
1103         ring0->base = mz->addr;
1104         ring0->basePA = mz->phys_addr;
1105
1106         /* cmd_ring1 initialization */
1107         ring1->base = ring0->base + ring0->size;
1108         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1109
1110         /* comp_ring initialization */
1111         comp_ring->base = ring1->base + ring1->size;
1112         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1113                 ring1->size;
1114
1115         /* data_ring initialization */
1116         if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1117                 data_ring->base =
1118                         (uint8_t *)(comp_ring->base + comp_ring->size);
1119                 data_ring->basePA = comp_ring->basePA +
1120                         sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1121         }
1122
1123         /* cmd_ring0-cmd_ring1 buf_info allocation */
1124         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1125
1126                 ring = &rxq->cmd_ring[i];
1127                 ring->rid = i;
1128                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1129
1130                 ring->buf_info = rte_zmalloc(mem_name,
1131                                              ring->size * sizeof(vmxnet3_buf_info_t),
1132                                              RTE_CACHE_LINE_SIZE);
1133                 if (ring->buf_info == NULL) {
1134                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1135                         return -ENOMEM;
1136                 }
1137         }
1138
1139         /* Update the data portion with rxq */
1140         dev->data->rx_queues[queue_idx] = rxq;
1141
1142         return 0;
1143 }
1144
1145 /*
1146  * Initializes Receive Unit
1147  * Load mbufs in rx queue in advance
1148  */
1149 int
1150 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1151 {
1152         struct vmxnet3_hw *hw = dev->data->dev_private;
1153
1154         int i, ret;
1155         uint8_t j;
1156
1157         PMD_INIT_FUNC_TRACE();
1158
1159         for (i = 0; i < hw->num_rx_queues; i++) {
1160                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1161
1162                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1163                         /* Passing 0 as alloc_num will allocate full ring */
1164                         ret = vmxnet3_post_rx_bufs(rxq, j);
1165                         if (ret <= 0) {
1166                                 PMD_INIT_LOG(ERR,
1167                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1168                                              i, j);
1169                                 return -ret;
1170                         }
1171                         /*
1172                          * Updating device with the index:next2fill to fill the
1173                          * mbufs for coming packets.
1174                          */
1175                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1176                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1177                                                        rxq->cmd_ring[j].next2fill);
1178                         }
1179                 }
1180                 rxq->stopped = FALSE;
1181                 rxq->start_seg = NULL;
1182         }
1183
1184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1185                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1186
1187                 txq->stopped = FALSE;
1188         }
1189
1190         return 0;
1191 }
1192
1193 static uint8_t rss_intel_key[40] = {
1194         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1195         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1196         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1197         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1198         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1199 };
1200
1201 /*
1202  * Configure RSS feature
1203  */
1204 int
1205 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1206 {
1207         struct vmxnet3_hw *hw = dev->data->dev_private;
1208         struct VMXNET3_RSSConf *dev_rss_conf;
1209         struct rte_eth_rss_conf *port_rss_conf;
1210         uint64_t rss_hf;
1211         uint8_t i, j;
1212
1213         PMD_INIT_FUNC_TRACE();
1214
1215         dev_rss_conf = hw->rss_conf;
1216         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1217
1218         /* loading hashFunc */
1219         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1220         /* loading hashKeySize */
1221         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1222         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1223         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1224
1225         if (port_rss_conf->rss_key == NULL) {
1226                 /* Default hash key */
1227                 port_rss_conf->rss_key = rss_intel_key;
1228         }
1229
1230         /* loading hashKey */
1231         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1232                dev_rss_conf->hashKeySize);
1233
1234         /* loading indTable */
1235         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1236                 if (j == dev->data->nb_rx_queues)
1237                         j = 0;
1238                 dev_rss_conf->indTable[i] = j;
1239         }
1240
1241         /* loading hashType */
1242         dev_rss_conf->hashType = 0;
1243         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1244         if (rss_hf & ETH_RSS_IPV4)
1245                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1246         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1247                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1248         if (rss_hf & ETH_RSS_IPV6)
1249                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1250         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1251                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1252
1253         return VMXNET3_SUCCESS;
1254 }