net/vmxnet3: allow variable length Tx data ring
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
81                 PKT_TX_VLAN_PKT | \
82                 PKT_TX_L4_MASK |  \
83                 PKT_TX_TCP_SEG)
84
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
86         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101         uint32_t avail = 0;
102
103         if (rxq == NULL)
104                 return;
105
106         PMD_RX_LOG(DEBUG,
107                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109         PMD_RX_LOG(DEBUG,
110                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111                    (unsigned long)rxq->cmd_ring[0].basePA,
112                    (unsigned long)rxq->cmd_ring[1].basePA,
113                    (unsigned long)rxq->comp_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116         PMD_RX_LOG(DEBUG,
117                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)rxq->cmd_ring[0].size, avail,
119                    rxq->comp_ring.next2proc,
120                    rxq->cmd_ring[0].size - avail);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125                    rxq->cmd_ring[1].size - avail);
126
127 }
128
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132         uint32_t avail = 0;
133
134         if (txq == NULL)
135                 return;
136
137         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140                    (unsigned long)txq->cmd_ring.basePA,
141                    (unsigned long)txq->comp_ring.basePA,
142                    (unsigned long)txq->data_ring.basePA);
143
144         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146                    (uint32_t)txq->cmd_ring.size, avail,
147                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154         while (ring->next2comp != ring->next2fill) {
155                 /* No need to worry about desc ownership, device is quiesced by now. */
156                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157
158                 if (buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166 }
167
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171         uint32_t i;
172
173         for (i = 0; i < ring->size; i++) {
174                 /* No need to worry about desc ownership, device is quiesced by now. */
175                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176
177                 if (buf_info->m) {
178                         rte_pktmbuf_free_seg(buf_info->m);
179                         buf_info->m = NULL;
180                         buf_info->bufPA = 0;
181                         buf_info->len = 0;
182                 }
183                 vmxnet3_cmd_ring_adv_next2comp(ring);
184         }
185 }
186
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190         rte_free(ring->buf_info);
191         ring->buf_info = NULL;
192 }
193
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197         vmxnet3_tx_queue_t *tq = txq;
198
199         if (tq != NULL) {
200                 /* Release mbufs */
201                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202                 /* Release the cmd_ring */
203                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204         }
205 }
206
207 void
208 vmxnet3_dev_rx_queue_release(void *rxq)
209 {
210         int i;
211         vmxnet3_rx_queue_t *rq = rxq;
212
213         if (rq != NULL) {
214                 /* Release mbufs */
215                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
217
218                 /* Release both the cmd_rings */
219                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
221         }
222 }
223
224 static void
225 vmxnet3_dev_tx_queue_reset(void *txq)
226 {
227         vmxnet3_tx_queue_t *tq = txq;
228         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
231         int size;
232
233         if (tq != NULL) {
234                 /* Release the cmd_ring mbufs */
235                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
236         }
237
238         /* Tx vmxnet rings structure initialization*/
239         ring->next2fill = 0;
240         ring->next2comp = 0;
241         ring->gen = VMXNET3_INIT_GEN;
242         comp_ring->next2proc = 0;
243         comp_ring->gen = VMXNET3_INIT_GEN;
244
245         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247         size += tq->txdata_desc_size * data_ring->size;
248
249         memset(ring->base, 0, size);
250 }
251
252 static void
253 vmxnet3_dev_rx_queue_reset(void *rxq)
254 {
255         int i;
256         vmxnet3_rx_queue_t *rq = rxq;
257         struct vmxnet3_cmd_ring *ring0, *ring1;
258         struct vmxnet3_comp_ring *comp_ring;
259         int size;
260
261         if (rq != NULL) {
262                 /* Release both the cmd_rings mbufs */
263                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
265         }
266
267         ring0 = &rq->cmd_ring[0];
268         ring1 = &rq->cmd_ring[1];
269         comp_ring = &rq->comp_ring;
270
271         /* Rx vmxnet rings structure initialization */
272         ring0->next2fill = 0;
273         ring1->next2fill = 0;
274         ring0->next2comp = 0;
275         ring1->next2comp = 0;
276         ring0->gen = VMXNET3_INIT_GEN;
277         ring1->gen = VMXNET3_INIT_GEN;
278         comp_ring->next2proc = 0;
279         comp_ring->gen = VMXNET3_INIT_GEN;
280
281         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
283
284         memset(ring0->base, 0, size);
285 }
286
287 void
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
289 {
290         unsigned i;
291
292         PMD_INIT_FUNC_TRACE();
293
294         for (i = 0; i < dev->data->nb_tx_queues; i++) {
295                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
296
297                 if (txq != NULL) {
298                         txq->stopped = TRUE;
299                         vmxnet3_dev_tx_queue_reset(txq);
300                 }
301         }
302
303         for (i = 0; i < dev->data->nb_rx_queues; i++) {
304                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
305
306                 if (rxq != NULL) {
307                         rxq->stopped = TRUE;
308                         vmxnet3_dev_rx_queue_reset(rxq);
309                 }
310         }
311 }
312
313 static int
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
315 {
316         int completed = 0;
317         struct rte_mbuf *mbuf;
318
319         /* Release cmd_ring descriptor and free mbuf */
320         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
321
322         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
323         if (mbuf == NULL)
324                 rte_panic("EOP desc does not point to a valid mbuf");
325         rte_pktmbuf_free(mbuf);
326
327         txq->cmd_ring.buf_info[eop_idx].m = NULL;
328
329         while (txq->cmd_ring.next2comp != eop_idx) {
330                 /* no out-of-order completion */
331                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
333                 completed++;
334         }
335
336         /* Mark the txd for which tcd was generated as completed */
337         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
338
339         return completed + 1;
340 }
341
342 static void
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
344 {
345         int completed = 0;
346         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348                 (comp_ring->base + comp_ring->next2proc);
349
350         while (tcd->gen == comp_ring->gen) {
351                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
352
353                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355                                                     comp_ring->next2proc);
356         }
357
358         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
359 }
360
361 uint16_t
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
363         uint16_t nb_pkts)
364 {
365         int32_t ret;
366         uint32_t i;
367         uint64_t ol_flags;
368         struct rte_mbuf *m;
369
370         for (i = 0; i != nb_pkts; i++) {
371                 m = tx_pkts[i];
372                 ol_flags = m->ol_flags;
373
374                 /* Non-TSO packet cannot occupy more than
375                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
376                  */
377                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
379                         rte_errno = -EINVAL;
380                         return i;
381                 }
382
383                 /* check that only supported TX offloads are requested. */
384                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385                                 (ol_flags & PKT_TX_L4_MASK) ==
386                                 PKT_TX_SCTP_CKSUM) {
387                         rte_errno = -ENOTSUP;
388                         return i;
389                 }
390
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392                 ret = rte_validate_tx_offload(m);
393                 if (ret != 0) {
394                         rte_errno = ret;
395                         return i;
396                 }
397 #endif
398                 ret = rte_net_intel_cksum_prepare(m);
399                 if (ret != 0) {
400                         rte_errno = ret;
401                         return i;
402                 }
403         }
404
405         return i;
406 }
407
408 uint16_t
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
410                   uint16_t nb_pkts)
411 {
412         uint16_t nb_tx;
413         vmxnet3_tx_queue_t *txq = tx_queue;
414         struct vmxnet3_hw *hw = txq->hw;
415         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
417
418         if (unlikely(txq->stopped)) {
419                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
420                 return 0;
421         }
422
423         /* Free up the comp_descriptors aggressively */
424         vmxnet3_tq_tx_complete(txq);
425
426         nb_tx = 0;
427         while (nb_tx < nb_pkts) {
428                 Vmxnet3_GenericDesc *gdesc;
429                 vmxnet3_buf_info_t *tbi;
430                 uint32_t first2fill, avail, dw2;
431                 struct rte_mbuf *txm = tx_pkts[nb_tx];
432                 struct rte_mbuf *m_seg = txm;
433                 int copy_size = 0;
434                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435                 /* # of descriptors needed for a packet. */
436                 unsigned count = txm->nb_segs;
437
438                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
439                 if (count > avail) {
440                         /* Is command ring full? */
441                         if (unlikely(avail == 0)) {
442                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443                                 txq->stats.tx_ring_full++;
444                                 txq->stats.drop_total += (nb_pkts - nb_tx);
445                                 break;
446                         }
447
448                         /* Command ring is not full but cannot handle the
449                          * multi-segmented packet. Let's try the next packet
450                          * in this case.
451                          */
452                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453                                    "(avail %d needed %d)", avail, count);
454                         txq->stats.drop_total++;
455                         if (tso)
456                                 txq->stats.drop_tso++;
457                         rte_pktmbuf_free(txm);
458                         nb_tx++;
459                         continue;
460                 }
461
462                 /* Drop non-TSO packet that is excessively fragmented */
463                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466                         txq->stats.drop_too_many_segs++;
467                         txq->stats.drop_total++;
468                         rte_pktmbuf_free(txm);
469                         nb_tx++;
470                         continue;
471                 }
472
473                 if (txm->nb_segs == 1 &&
474                     rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
475                         struct Vmxnet3_TxDataDesc *tdd;
476
477                         tdd = (struct Vmxnet3_TxDataDesc *)
478                                 ((uint8 *)txq->data_ring.base +
479                                  txq->cmd_ring.next2fill *
480                                  txq->txdata_desc_size);
481                         copy_size = rte_pktmbuf_pkt_len(txm);
482                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
483                 }
484
485                 /* use the previous gen bit for the SOP desc */
486                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
487                 first2fill = txq->cmd_ring.next2fill;
488                 do {
489                         /* Remember the transmit buffer for cleanup */
490                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
491
492                         /* NB: the following assumes that VMXNET3 maximum
493                          * transmit buffer size (16K) is greater than
494                          * maximum size of mbuf segment size.
495                          */
496                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
497                         if (copy_size) {
498                                 uint64 offset = txq->cmd_ring.next2fill *
499                                                 txq->txdata_desc_size;
500                                 gdesc->txd.addr =
501                                         rte_cpu_to_le_64(txq->data_ring.basePA +
502                                                          offset);
503                         } else {
504                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
505                         }
506
507                         gdesc->dword[2] = dw2 | m_seg->data_len;
508                         gdesc->dword[3] = 0;
509
510                         /* move to the next2fill descriptor */
511                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
512
513                         /* use the right gen for non-SOP desc */
514                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
515                 } while ((m_seg = m_seg->next) != NULL);
516
517                 /* set the last buf_info for the pkt */
518                 tbi->m = txm;
519                 /* Update the EOP descriptor */
520                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
521
522                 /* Add VLAN tag if present */
523                 gdesc = txq->cmd_ring.base + first2fill;
524                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
525                         gdesc->txd.ti = 1;
526                         gdesc->txd.tci = txm->vlan_tci;
527                 }
528
529                 if (tso) {
530                         uint16_t mss = txm->tso_segsz;
531
532                         RTE_ASSERT(mss > 0);
533
534                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
535                         gdesc->txd.om = VMXNET3_OM_TSO;
536                         gdesc->txd.msscof = mss;
537
538                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
539                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
540                         gdesc->txd.om = VMXNET3_OM_CSUM;
541                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
542
543                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
544                         case PKT_TX_TCP_CKSUM:
545                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
546                                 break;
547                         case PKT_TX_UDP_CKSUM:
548                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
549                                 break;
550                         default:
551                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
552                                            txm->ol_flags & PKT_TX_L4_MASK);
553                                 abort();
554                         }
555                         deferred++;
556                 } else {
557                         gdesc->txd.hlen = 0;
558                         gdesc->txd.om = VMXNET3_OM_NONE;
559                         gdesc->txd.msscof = 0;
560                         deferred++;
561                 }
562
563                 /* flip the GEN bit on the SOP */
564                 rte_compiler_barrier();
565                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
566
567                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
568                 nb_tx++;
569         }
570
571         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
572
573         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
574                 txq_ctrl->txNumDeferred = 0;
575                 /* Notify vSwitch that packets are available. */
576                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
577                                        txq->cmd_ring.next2fill);
578         }
579
580         return nb_tx;
581 }
582
583 static inline void
584 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
585                    struct rte_mbuf *mbuf)
586 {
587         uint32_t val = 0;
588         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
589         struct Vmxnet3_RxDesc *rxd =
590                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
591         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
592
593         if (ring_id == 0)
594                 val = VMXNET3_RXD_BTYPE_HEAD;
595         else
596                 val = VMXNET3_RXD_BTYPE_BODY;
597
598         buf_info->m = mbuf;
599         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
600         buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
601
602         rxd->addr = buf_info->bufPA;
603         rxd->btype = val;
604         rxd->len = buf_info->len;
605         rxd->gen = ring->gen;
606
607         vmxnet3_cmd_ring_adv_next2fill(ring);
608 }
609 /*
610  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
611  *  so that device can receive packets in those buffers.
612  *  Ring layout:
613  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
614  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
615  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
616  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
617  *      only for LRO.
618  */
619 static int
620 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
621 {
622         int err = 0;
623         uint32_t i = 0, val = 0;
624         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
625
626         if (ring_id == 0) {
627                 /* Usually: One HEAD type buf per packet
628                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
629                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
630                  */
631
632                 /* We use single packet buffer so all heads here */
633                 val = VMXNET3_RXD_BTYPE_HEAD;
634         } else {
635                 /* All BODY type buffers for 2nd ring */
636                 val = VMXNET3_RXD_BTYPE_BODY;
637         }
638
639         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
640                 struct Vmxnet3_RxDesc *rxd;
641                 struct rte_mbuf *mbuf;
642                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
643
644                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
645
646                 /* Allocate blank mbuf for the current Rx Descriptor */
647                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
648                 if (unlikely(mbuf == NULL)) {
649                         PMD_RX_LOG(ERR, "Error allocating mbuf");
650                         rxq->stats.rx_buf_alloc_failure++;
651                         err = ENOMEM;
652                         break;
653                 }
654
655                 /*
656                  * Load mbuf pointer into buf_info[ring_size]
657                  * buf_info structure is equivalent to cookie for virtio-virtqueue
658                  */
659                 buf_info->m = mbuf;
660                 buf_info->len = (uint16_t)(mbuf->buf_len -
661                                            RTE_PKTMBUF_HEADROOM);
662                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
663
664                 /* Load Rx Descriptor with the buffer's GPA */
665                 rxd->addr = buf_info->bufPA;
666
667                 /* After this point rxd->addr MUST not be NULL */
668                 rxd->btype = val;
669                 rxd->len = buf_info->len;
670                 /* Flip gen bit at the end to change ownership */
671                 rxd->gen = ring->gen;
672
673                 vmxnet3_cmd_ring_adv_next2fill(ring);
674                 i++;
675         }
676
677         /* Return error only if no buffers are posted at present */
678         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
679                 return -err;
680         else
681                 return i;
682 }
683
684
685 /* Receive side checksum and other offloads */
686 static void
687 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
688 {
689         /* Check for RSS */
690         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
691                 rxm->ol_flags |= PKT_RX_RSS_HASH;
692                 rxm->hash.rss = rcd->rssHash;
693         }
694
695         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
696         if (rcd->v4) {
697                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
698                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
699
700                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
701                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
702                 else
703                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
704
705                 if (!rcd->cnc) {
706                         if (!rcd->ipc)
707                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
708
709                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
710                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
711                 }
712         }
713 }
714
715 /*
716  * Process the Rx Completion Ring of given vmxnet3_rx_queue
717  * for nb_pkts burst and return the number of packets received
718  */
719 uint16_t
720 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
721 {
722         uint16_t nb_rx;
723         uint32_t nb_rxd, idx;
724         uint8_t ring_idx;
725         vmxnet3_rx_queue_t *rxq;
726         Vmxnet3_RxCompDesc *rcd;
727         vmxnet3_buf_info_t *rbi;
728         Vmxnet3_RxDesc *rxd;
729         struct rte_mbuf *rxm = NULL;
730         struct vmxnet3_hw *hw;
731
732         nb_rx = 0;
733         ring_idx = 0;
734         nb_rxd = 0;
735         idx = 0;
736
737         rxq = rx_queue;
738         hw = rxq->hw;
739
740         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
741
742         if (unlikely(rxq->stopped)) {
743                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
744                 return 0;
745         }
746
747         while (rcd->gen == rxq->comp_ring.gen) {
748                 struct rte_mbuf *newm;
749
750                 if (nb_rx >= nb_pkts)
751                         break;
752
753                 newm = rte_mbuf_raw_alloc(rxq->mp);
754                 if (unlikely(newm == NULL)) {
755                         PMD_RX_LOG(ERR, "Error allocating mbuf");
756                         rxq->stats.rx_buf_alloc_failure++;
757                         break;
758                 }
759
760                 idx = rcd->rxdIdx;
761                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
762                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
763                 RTE_SET_USED(rxd); /* used only for assert when enabled */
764                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
765
766                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
767
768                 RTE_ASSERT(rcd->len <= rxd->len);
769                 RTE_ASSERT(rbi->m);
770
771                 /* Get the packet buffer pointer from buf_info */
772                 rxm = rbi->m;
773
774                 /* Clear descriptor associated buf_info to be reused */
775                 rbi->m = NULL;
776                 rbi->bufPA = 0;
777
778                 /* Update the index that we received a packet */
779                 rxq->cmd_ring[ring_idx].next2comp = idx;
780
781                 /* For RCD with EOP set, check if there is frame error */
782                 if (unlikely(rcd->eop && rcd->err)) {
783                         rxq->stats.drop_total++;
784                         rxq->stats.drop_err++;
785
786                         if (!rcd->fcs) {
787                                 rxq->stats.drop_fcs++;
788                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
789                         }
790                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
791                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
792                                          rxq->comp_ring.base), rcd->rxdIdx);
793                         rte_pktmbuf_free_seg(rxm);
794                         goto rcd_done;
795                 }
796
797                 /* Initialize newly received packet buffer */
798                 rxm->port = rxq->port_id;
799                 rxm->nb_segs = 1;
800                 rxm->next = NULL;
801                 rxm->pkt_len = (uint16_t)rcd->len;
802                 rxm->data_len = (uint16_t)rcd->len;
803                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
804                 rxm->ol_flags = 0;
805                 rxm->vlan_tci = 0;
806
807                 /*
808                  * If this is the first buffer of the received packet,
809                  * set the pointer to the first mbuf of the packet
810                  * Otherwise, update the total length and the number of segments
811                  * of the current scattered packet, and update the pointer to
812                  * the last mbuf of the current packet.
813                  */
814                 if (rcd->sop) {
815                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
816
817                         if (unlikely(rcd->len == 0)) {
818                                 RTE_ASSERT(rcd->eop);
819
820                                 PMD_RX_LOG(DEBUG,
821                                            "Rx buf was skipped. rxring[%d][%d])",
822                                            ring_idx, idx);
823                                 rte_pktmbuf_free_seg(rxm);
824                                 goto rcd_done;
825                         }
826
827                         rxq->start_seg = rxm;
828                         vmxnet3_rx_offload(rcd, rxm);
829                 } else {
830                         struct rte_mbuf *start = rxq->start_seg;
831
832                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
833
834                         start->pkt_len += rxm->data_len;
835                         start->nb_segs++;
836
837                         rxq->last_seg->next = rxm;
838                 }
839                 rxq->last_seg = rxm;
840
841                 if (rcd->eop) {
842                         struct rte_mbuf *start = rxq->start_seg;
843
844                         /* Check for hardware stripped VLAN tag */
845                         if (rcd->ts) {
846                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
847                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
848                         }
849
850                         rx_pkts[nb_rx++] = start;
851                         rxq->start_seg = NULL;
852                 }
853
854 rcd_done:
855                 rxq->cmd_ring[ring_idx].next2comp = idx;
856                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
857                                           rxq->cmd_ring[ring_idx].size);
858
859                 /* It's time to renew descriptors */
860                 vmxnet3_renew_desc(rxq, ring_idx, newm);
861                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
862                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
863                                                rxq->cmd_ring[ring_idx].next2fill);
864                 }
865
866                 /* Advance to the next descriptor in comp_ring */
867                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
868
869                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
870                 nb_rxd++;
871                 if (nb_rxd > rxq->cmd_ring[0].size) {
872                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
873                                    " relinquish control.");
874                         break;
875                 }
876         }
877
878         return nb_rx;
879 }
880
881 /*
882  * Create memzone for device rings. malloc can't be used as the physical address is
883  * needed. If the memzone is already created, then this function returns a ptr
884  * to the old one.
885  */
886 static const struct rte_memzone *
887 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
888                       uint16_t queue_id, uint32_t ring_size, int socket_id)
889 {
890         char z_name[RTE_MEMZONE_NAMESIZE];
891         const struct rte_memzone *mz;
892
893         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
894                  dev->driver->pci_drv.driver.name, ring_name,
895                  dev->data->port_id, queue_id);
896
897         mz = rte_memzone_lookup(z_name);
898         if (mz)
899                 return mz;
900
901         return rte_memzone_reserve_aligned(z_name, ring_size,
902                                            socket_id, 0, VMXNET3_RING_BA_ALIGN);
903 }
904
905 int
906 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
907                            uint16_t queue_idx,
908                            uint16_t nb_desc,
909                            unsigned int socket_id,
910                            __rte_unused const struct rte_eth_txconf *tx_conf)
911 {
912         struct vmxnet3_hw *hw = dev->data->dev_private;
913         const struct rte_memzone *mz;
914         struct vmxnet3_tx_queue *txq;
915         struct vmxnet3_cmd_ring *ring;
916         struct vmxnet3_comp_ring *comp_ring;
917         struct vmxnet3_data_ring *data_ring;
918         int size;
919
920         PMD_INIT_FUNC_TRACE();
921
922         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
923             ETH_TXQ_FLAGS_NOXSUMSCTP) {
924                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
925                 return -EINVAL;
926         }
927
928         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
929                           RTE_CACHE_LINE_SIZE);
930         if (txq == NULL) {
931                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
932                 return -ENOMEM;
933         }
934
935         txq->queue_id = queue_idx;
936         txq->port_id = dev->data->port_id;
937         txq->shared = &hw->tqd_start[queue_idx];
938         txq->hw = hw;
939         txq->qid = queue_idx;
940         txq->stopped = TRUE;
941         txq->txdata_desc_size = hw->txdata_desc_size;
942
943         ring = &txq->cmd_ring;
944         comp_ring = &txq->comp_ring;
945         data_ring = &txq->data_ring;
946
947         /* Tx vmxnet ring length should be between 512-4096 */
948         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
949                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
950                              VMXNET3_DEF_TX_RING_SIZE);
951                 return -EINVAL;
952         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
953                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
954                              VMXNET3_TX_RING_MAX_SIZE);
955                 return -EINVAL;
956         } else {
957                 ring->size = nb_desc;
958                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
959         }
960         comp_ring->size = data_ring->size = ring->size;
961
962         /* Tx vmxnet rings structure initialization*/
963         ring->next2fill = 0;
964         ring->next2comp = 0;
965         ring->gen = VMXNET3_INIT_GEN;
966         comp_ring->next2proc = 0;
967         comp_ring->gen = VMXNET3_INIT_GEN;
968
969         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
970         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
971         size += txq->txdata_desc_size * data_ring->size;
972
973         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
974         if (mz == NULL) {
975                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
976                 return -ENOMEM;
977         }
978         memset(mz->addr, 0, mz->len);
979
980         /* cmd_ring initialization */
981         ring->base = mz->addr;
982         ring->basePA = mz->phys_addr;
983
984         /* comp_ring initialization */
985         comp_ring->base = ring->base + ring->size;
986         comp_ring->basePA = ring->basePA +
987                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
988
989         /* data_ring initialization */
990         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
991         data_ring->basePA = comp_ring->basePA +
992                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
993
994         /* cmd_ring0 buf_info allocation */
995         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
996                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
997         if (ring->buf_info == NULL) {
998                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
999                 return -ENOMEM;
1000         }
1001
1002         /* Update the data portion with txq */
1003         dev->data->tx_queues[queue_idx] = txq;
1004
1005         return 0;
1006 }
1007
1008 int
1009 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1010                            uint16_t queue_idx,
1011                            uint16_t nb_desc,
1012                            unsigned int socket_id,
1013                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1014                            struct rte_mempool *mp)
1015 {
1016         const struct rte_memzone *mz;
1017         struct vmxnet3_rx_queue *rxq;
1018         struct vmxnet3_hw *hw = dev->data->dev_private;
1019         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1020         struct vmxnet3_comp_ring *comp_ring;
1021         int size;
1022         uint8_t i;
1023         char mem_name[32];
1024
1025         PMD_INIT_FUNC_TRACE();
1026
1027         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1028                           RTE_CACHE_LINE_SIZE);
1029         if (rxq == NULL) {
1030                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1031                 return -ENOMEM;
1032         }
1033
1034         rxq->mp = mp;
1035         rxq->queue_id = queue_idx;
1036         rxq->port_id = dev->data->port_id;
1037         rxq->shared = &hw->rqd_start[queue_idx];
1038         rxq->hw = hw;
1039         rxq->qid1 = queue_idx;
1040         rxq->qid2 = queue_idx + hw->num_rx_queues;
1041         rxq->stopped = TRUE;
1042
1043         ring0 = &rxq->cmd_ring[0];
1044         ring1 = &rxq->cmd_ring[1];
1045         comp_ring = &rxq->comp_ring;
1046
1047         /* Rx vmxnet rings length should be between 256-4096 */
1048         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1049                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1050                 return -EINVAL;
1051         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1052                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1053                 return -EINVAL;
1054         } else {
1055                 ring0->size = nb_desc;
1056                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1057                 ring1->size = ring0->size;
1058         }
1059
1060         comp_ring->size = ring0->size + ring1->size;
1061
1062         /* Rx vmxnet rings structure initialization */
1063         ring0->next2fill = 0;
1064         ring1->next2fill = 0;
1065         ring0->next2comp = 0;
1066         ring1->next2comp = 0;
1067         ring0->gen = VMXNET3_INIT_GEN;
1068         ring1->gen = VMXNET3_INIT_GEN;
1069         comp_ring->next2proc = 0;
1070         comp_ring->gen = VMXNET3_INIT_GEN;
1071
1072         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1073         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1074
1075         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1076         if (mz == NULL) {
1077                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1078                 return -ENOMEM;
1079         }
1080         memset(mz->addr, 0, mz->len);
1081
1082         /* cmd_ring0 initialization */
1083         ring0->base = mz->addr;
1084         ring0->basePA = mz->phys_addr;
1085
1086         /* cmd_ring1 initialization */
1087         ring1->base = ring0->base + ring0->size;
1088         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1089
1090         /* comp_ring initialization */
1091         comp_ring->base = ring1->base + ring1->size;
1092         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1093                 ring1->size;
1094
1095         /* cmd_ring0-cmd_ring1 buf_info allocation */
1096         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1097
1098                 ring = &rxq->cmd_ring[i];
1099                 ring->rid = i;
1100                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1101
1102                 ring->buf_info = rte_zmalloc(mem_name,
1103                                              ring->size * sizeof(vmxnet3_buf_info_t),
1104                                              RTE_CACHE_LINE_SIZE);
1105                 if (ring->buf_info == NULL) {
1106                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1107                         return -ENOMEM;
1108                 }
1109         }
1110
1111         /* Update the data portion with rxq */
1112         dev->data->rx_queues[queue_idx] = rxq;
1113
1114         return 0;
1115 }
1116
1117 /*
1118  * Initializes Receive Unit
1119  * Load mbufs in rx queue in advance
1120  */
1121 int
1122 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1123 {
1124         struct vmxnet3_hw *hw = dev->data->dev_private;
1125
1126         int i, ret;
1127         uint8_t j;
1128
1129         PMD_INIT_FUNC_TRACE();
1130
1131         for (i = 0; i < hw->num_rx_queues; i++) {
1132                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1133
1134                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1135                         /* Passing 0 as alloc_num will allocate full ring */
1136                         ret = vmxnet3_post_rx_bufs(rxq, j);
1137                         if (ret <= 0) {
1138                                 PMD_INIT_LOG(ERR,
1139                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1140                                              i, j);
1141                                 return -ret;
1142                         }
1143                         /*
1144                          * Updating device with the index:next2fill to fill the
1145                          * mbufs for coming packets.
1146                          */
1147                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1148                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1149                                                        rxq->cmd_ring[j].next2fill);
1150                         }
1151                 }
1152                 rxq->stopped = FALSE;
1153                 rxq->start_seg = NULL;
1154         }
1155
1156         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1157                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1158
1159                 txq->stopped = FALSE;
1160         }
1161
1162         return 0;
1163 }
1164
1165 static uint8_t rss_intel_key[40] = {
1166         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1167         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1168         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1169         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1170         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1171 };
1172
1173 /*
1174  * Configure RSS feature
1175  */
1176 int
1177 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1178 {
1179         struct vmxnet3_hw *hw = dev->data->dev_private;
1180         struct VMXNET3_RSSConf *dev_rss_conf;
1181         struct rte_eth_rss_conf *port_rss_conf;
1182         uint64_t rss_hf;
1183         uint8_t i, j;
1184
1185         PMD_INIT_FUNC_TRACE();
1186
1187         dev_rss_conf = hw->rss_conf;
1188         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1189
1190         /* loading hashFunc */
1191         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1192         /* loading hashKeySize */
1193         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1194         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1195         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1196
1197         if (port_rss_conf->rss_key == NULL) {
1198                 /* Default hash key */
1199                 port_rss_conf->rss_key = rss_intel_key;
1200         }
1201
1202         /* loading hashKey */
1203         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1204                dev_rss_conf->hashKeySize);
1205
1206         /* loading indTable */
1207         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1208                 if (j == dev->data->nb_rx_queues)
1209                         j = 0;
1210                 dev_rss_conf->indTable[i] = j;
1211         }
1212
1213         /* loading hashType */
1214         dev_rss_conf->hashType = 0;
1215         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1216         if (rss_hf & ETH_RSS_IPV4)
1217                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1218         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1219                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1220         if (rss_hf & ETH_RSS_IPV6)
1221                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1222         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1223                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1224
1225         return VMXNET3_SUCCESS;
1226 }