net/vmxnet3: fix Rx deadlock
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
81                 PKT_TX_VLAN_PKT | \
82                 PKT_TX_L4_MASK |  \
83                 PKT_TX_TCP_SEG)
84
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
86         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101         uint32_t avail = 0;
102
103         if (rxq == NULL)
104                 return;
105
106         PMD_RX_LOG(DEBUG,
107                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109         PMD_RX_LOG(DEBUG,
110                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111                    (unsigned long)rxq->cmd_ring[0].basePA,
112                    (unsigned long)rxq->cmd_ring[1].basePA,
113                    (unsigned long)rxq->comp_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116         PMD_RX_LOG(DEBUG,
117                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)rxq->cmd_ring[0].size, avail,
119                    rxq->comp_ring.next2proc,
120                    rxq->cmd_ring[0].size - avail);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125                    rxq->cmd_ring[1].size - avail);
126
127 }
128
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132         uint32_t avail = 0;
133
134         if (txq == NULL)
135                 return;
136
137         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140                    (unsigned long)txq->cmd_ring.basePA,
141                    (unsigned long)txq->comp_ring.basePA,
142                    (unsigned long)txq->data_ring.basePA);
143
144         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146                    (uint32_t)txq->cmd_ring.size, avail,
147                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154         while (ring->next2comp != ring->next2fill) {
155                 /* No need to worry about desc ownership, device is quiesced by now. */
156                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157
158                 if (buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166 }
167
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171         uint32_t i;
172
173         for (i = 0; i < ring->size; i++) {
174                 /* No need to worry about desc ownership, device is quiesced by now. */
175                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176
177                 if (buf_info->m) {
178                         rte_pktmbuf_free_seg(buf_info->m);
179                         buf_info->m = NULL;
180                         buf_info->bufPA = 0;
181                         buf_info->len = 0;
182                 }
183                 vmxnet3_cmd_ring_adv_next2comp(ring);
184         }
185 }
186
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190         rte_free(ring->buf_info);
191         ring->buf_info = NULL;
192 }
193
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197         vmxnet3_tx_queue_t *tq = txq;
198
199         if (tq != NULL) {
200                 /* Release mbufs */
201                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202                 /* Release the cmd_ring */
203                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204         }
205 }
206
207 void
208 vmxnet3_dev_rx_queue_release(void *rxq)
209 {
210         int i;
211         vmxnet3_rx_queue_t *rq = rxq;
212
213         if (rq != NULL) {
214                 /* Release mbufs */
215                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
217
218                 /* Release both the cmd_rings */
219                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
221         }
222 }
223
224 static void
225 vmxnet3_dev_tx_queue_reset(void *txq)
226 {
227         vmxnet3_tx_queue_t *tq = txq;
228         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
231         int size;
232
233         if (tq != NULL) {
234                 /* Release the cmd_ring mbufs */
235                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
236         }
237
238         /* Tx vmxnet rings structure initialization*/
239         ring->next2fill = 0;
240         ring->next2comp = 0;
241         ring->gen = VMXNET3_INIT_GEN;
242         comp_ring->next2proc = 0;
243         comp_ring->gen = VMXNET3_INIT_GEN;
244
245         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
248
249         memset(ring->base, 0, size);
250 }
251
252 static void
253 vmxnet3_dev_rx_queue_reset(void *rxq)
254 {
255         int i;
256         vmxnet3_rx_queue_t *rq = rxq;
257         struct vmxnet3_cmd_ring *ring0, *ring1;
258         struct vmxnet3_comp_ring *comp_ring;
259         int size;
260
261         if (rq != NULL) {
262                 /* Release both the cmd_rings mbufs */
263                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
265         }
266
267         ring0 = &rq->cmd_ring[0];
268         ring1 = &rq->cmd_ring[1];
269         comp_ring = &rq->comp_ring;
270
271         /* Rx vmxnet rings structure initialization */
272         ring0->next2fill = 0;
273         ring1->next2fill = 0;
274         ring0->next2comp = 0;
275         ring1->next2comp = 0;
276         ring0->gen = VMXNET3_INIT_GEN;
277         ring1->gen = VMXNET3_INIT_GEN;
278         comp_ring->next2proc = 0;
279         comp_ring->gen = VMXNET3_INIT_GEN;
280
281         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
283
284         memset(ring0->base, 0, size);
285 }
286
287 void
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
289 {
290         unsigned i;
291
292         PMD_INIT_FUNC_TRACE();
293
294         for (i = 0; i < dev->data->nb_tx_queues; i++) {
295                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
296
297                 if (txq != NULL) {
298                         txq->stopped = TRUE;
299                         vmxnet3_dev_tx_queue_reset(txq);
300                 }
301         }
302
303         for (i = 0; i < dev->data->nb_rx_queues; i++) {
304                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
305
306                 if (rxq != NULL) {
307                         rxq->stopped = TRUE;
308                         vmxnet3_dev_rx_queue_reset(rxq);
309                 }
310         }
311 }
312
313 static int
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
315 {
316         int completed = 0;
317         struct rte_mbuf *mbuf;
318
319         /* Release cmd_ring descriptor and free mbuf */
320         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
321
322         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
323         if (mbuf == NULL)
324                 rte_panic("EOP desc does not point to a valid mbuf");
325         rte_pktmbuf_free(mbuf);
326
327         txq->cmd_ring.buf_info[eop_idx].m = NULL;
328
329         while (txq->cmd_ring.next2comp != eop_idx) {
330                 /* no out-of-order completion */
331                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
333                 completed++;
334         }
335
336         /* Mark the txd for which tcd was generated as completed */
337         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
338
339         return completed + 1;
340 }
341
342 static void
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
344 {
345         int completed = 0;
346         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348                 (comp_ring->base + comp_ring->next2proc);
349
350         while (tcd->gen == comp_ring->gen) {
351                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
352
353                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355                                                     comp_ring->next2proc);
356         }
357
358         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
359 }
360
361 uint16_t
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
363         uint16_t nb_pkts)
364 {
365         int32_t ret;
366         uint32_t i;
367         uint64_t ol_flags;
368         struct rte_mbuf *m;
369
370         for (i = 0; i != nb_pkts; i++) {
371                 m = tx_pkts[i];
372                 ol_flags = m->ol_flags;
373
374                 /* Non-TSO packet cannot occupy more than
375                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
376                  */
377                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
379                         rte_errno = -EINVAL;
380                         return i;
381                 }
382
383                 /* check that only supported TX offloads are requested. */
384                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385                                 (ol_flags & PKT_TX_L4_MASK) ==
386                                 PKT_TX_SCTP_CKSUM) {
387                         rte_errno = -ENOTSUP;
388                         return i;
389                 }
390
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392                 ret = rte_validate_tx_offload(m);
393                 if (ret != 0) {
394                         rte_errno = ret;
395                         return i;
396                 }
397 #endif
398                 ret = rte_net_intel_cksum_prepare(m);
399                 if (ret != 0) {
400                         rte_errno = ret;
401                         return i;
402                 }
403         }
404
405         return i;
406 }
407
408 uint16_t
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
410                   uint16_t nb_pkts)
411 {
412         uint16_t nb_tx;
413         vmxnet3_tx_queue_t *txq = tx_queue;
414         struct vmxnet3_hw *hw = txq->hw;
415         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
417
418         if (unlikely(txq->stopped)) {
419                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
420                 return 0;
421         }
422
423         /* Free up the comp_descriptors aggressively */
424         vmxnet3_tq_tx_complete(txq);
425
426         nb_tx = 0;
427         while (nb_tx < nb_pkts) {
428                 Vmxnet3_GenericDesc *gdesc;
429                 vmxnet3_buf_info_t *tbi;
430                 uint32_t first2fill, avail, dw2;
431                 struct rte_mbuf *txm = tx_pkts[nb_tx];
432                 struct rte_mbuf *m_seg = txm;
433                 int copy_size = 0;
434                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435                 /* # of descriptors needed for a packet. */
436                 unsigned count = txm->nb_segs;
437
438                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
439                 if (count > avail) {
440                         /* Is command ring full? */
441                         if (unlikely(avail == 0)) {
442                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443                                 txq->stats.tx_ring_full++;
444                                 txq->stats.drop_total += (nb_pkts - nb_tx);
445                                 break;
446                         }
447
448                         /* Command ring is not full but cannot handle the
449                          * multi-segmented packet. Let's try the next packet
450                          * in this case.
451                          */
452                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453                                    "(avail %d needed %d)", avail, count);
454                         txq->stats.drop_total++;
455                         if (tso)
456                                 txq->stats.drop_tso++;
457                         rte_pktmbuf_free(txm);
458                         nb_tx++;
459                         continue;
460                 }
461
462                 /* Drop non-TSO packet that is excessively fragmented */
463                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466                         txq->stats.drop_too_many_segs++;
467                         txq->stats.drop_total++;
468                         rte_pktmbuf_free(txm);
469                         nb_tx++;
470                         continue;
471                 }
472
473                 if (txm->nb_segs == 1 &&
474                     rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
475                         struct Vmxnet3_TxDataDesc *tdd;
476
477                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
478                         copy_size = rte_pktmbuf_pkt_len(txm);
479                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
480                 }
481
482                 /* use the previous gen bit for the SOP desc */
483                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
484                 first2fill = txq->cmd_ring.next2fill;
485                 do {
486                         /* Remember the transmit buffer for cleanup */
487                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
488
489                         /* NB: the following assumes that VMXNET3 maximum
490                          * transmit buffer size (16K) is greater than
491                          * maximum size of mbuf segment size.
492                          */
493                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
494                         if (copy_size)
495                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
496                                                                    txq->cmd_ring.next2fill *
497                                                                    sizeof(struct Vmxnet3_TxDataDesc));
498                         else
499                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
500
501                         gdesc->dword[2] = dw2 | m_seg->data_len;
502                         gdesc->dword[3] = 0;
503
504                         /* move to the next2fill descriptor */
505                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
506
507                         /* use the right gen for non-SOP desc */
508                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
509                 } while ((m_seg = m_seg->next) != NULL);
510
511                 /* set the last buf_info for the pkt */
512                 tbi->m = txm;
513                 /* Update the EOP descriptor */
514                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
515
516                 /* Add VLAN tag if present */
517                 gdesc = txq->cmd_ring.base + first2fill;
518                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
519                         gdesc->txd.ti = 1;
520                         gdesc->txd.tci = txm->vlan_tci;
521                 }
522
523                 if (tso) {
524                         uint16_t mss = txm->tso_segsz;
525
526                         RTE_ASSERT(mss > 0);
527
528                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
529                         gdesc->txd.om = VMXNET3_OM_TSO;
530                         gdesc->txd.msscof = mss;
531
532                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
533                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
534                         gdesc->txd.om = VMXNET3_OM_CSUM;
535                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
536
537                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
538                         case PKT_TX_TCP_CKSUM:
539                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
540                                 break;
541                         case PKT_TX_UDP_CKSUM:
542                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
543                                 break;
544                         default:
545                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
546                                            txm->ol_flags & PKT_TX_L4_MASK);
547                                 abort();
548                         }
549                         deferred++;
550                 } else {
551                         gdesc->txd.hlen = 0;
552                         gdesc->txd.om = VMXNET3_OM_NONE;
553                         gdesc->txd.msscof = 0;
554                         deferred++;
555                 }
556
557                 /* flip the GEN bit on the SOP */
558                 rte_compiler_barrier();
559                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
560
561                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
562                 nb_tx++;
563         }
564
565         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
566
567         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
568                 txq_ctrl->txNumDeferred = 0;
569                 /* Notify vSwitch that packets are available. */
570                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
571                                        txq->cmd_ring.next2fill);
572         }
573
574         return nb_tx;
575 }
576
577 static inline void
578 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
579                    struct rte_mbuf *mbuf)
580 {
581         uint32_t val = 0;
582         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
583         struct Vmxnet3_RxDesc *rxd =
584                 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
585         vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
586
587         if (ring_id == 0)
588                 val = VMXNET3_RXD_BTYPE_HEAD;
589         else
590                 val = VMXNET3_RXD_BTYPE_BODY;
591
592         buf_info->m = mbuf;
593         buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
594         buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
595
596         rxd->addr = buf_info->bufPA;
597         rxd->btype = val;
598         rxd->len = buf_info->len;
599         rxd->gen = ring->gen;
600
601         vmxnet3_cmd_ring_adv_next2fill(ring);
602 }
603 /*
604  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
605  *  so that device can receive packets in those buffers.
606  *  Ring layout:
607  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
608  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
609  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
610  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
611  *      only for LRO.
612  */
613 static int
614 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
615 {
616         int err = 0;
617         uint32_t i = 0, val = 0;
618         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
619
620         if (ring_id == 0) {
621                 /* Usually: One HEAD type buf per packet
622                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
623                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
624                  */
625
626                 /* We use single packet buffer so all heads here */
627                 val = VMXNET3_RXD_BTYPE_HEAD;
628         } else {
629                 /* All BODY type buffers for 2nd ring */
630                 val = VMXNET3_RXD_BTYPE_BODY;
631         }
632
633         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
634                 struct Vmxnet3_RxDesc *rxd;
635                 struct rte_mbuf *mbuf;
636                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
637
638                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
639
640                 /* Allocate blank mbuf for the current Rx Descriptor */
641                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
642                 if (unlikely(mbuf == NULL)) {
643                         PMD_RX_LOG(ERR, "Error allocating mbuf");
644                         rxq->stats.rx_buf_alloc_failure++;
645                         err = ENOMEM;
646                         break;
647                 }
648
649                 /*
650                  * Load mbuf pointer into buf_info[ring_size]
651                  * buf_info structure is equivalent to cookie for virtio-virtqueue
652                  */
653                 buf_info->m = mbuf;
654                 buf_info->len = (uint16_t)(mbuf->buf_len -
655                                            RTE_PKTMBUF_HEADROOM);
656                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
657
658                 /* Load Rx Descriptor with the buffer's GPA */
659                 rxd->addr = buf_info->bufPA;
660
661                 /* After this point rxd->addr MUST not be NULL */
662                 rxd->btype = val;
663                 rxd->len = buf_info->len;
664                 /* Flip gen bit at the end to change ownership */
665                 rxd->gen = ring->gen;
666
667                 vmxnet3_cmd_ring_adv_next2fill(ring);
668                 i++;
669         }
670
671         /* Return error only if no buffers are posted at present */
672         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
673                 return -err;
674         else
675                 return i;
676 }
677
678
679 /* Receive side checksum and other offloads */
680 static void
681 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
682 {
683         /* Check for RSS */
684         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
685                 rxm->ol_flags |= PKT_RX_RSS_HASH;
686                 rxm->hash.rss = rcd->rssHash;
687         }
688
689         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
690         if (rcd->v4) {
691                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
692                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
693
694                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
695                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
696                 else
697                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
698
699                 if (!rcd->cnc) {
700                         if (!rcd->ipc)
701                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
702
703                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
704                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
705                 }
706         }
707 }
708
709 /*
710  * Process the Rx Completion Ring of given vmxnet3_rx_queue
711  * for nb_pkts burst and return the number of packets received
712  */
713 uint16_t
714 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
715 {
716         uint16_t nb_rx;
717         uint32_t nb_rxd, idx;
718         uint8_t ring_idx;
719         vmxnet3_rx_queue_t *rxq;
720         Vmxnet3_RxCompDesc *rcd;
721         vmxnet3_buf_info_t *rbi;
722         Vmxnet3_RxDesc *rxd;
723         struct rte_mbuf *rxm = NULL;
724         struct vmxnet3_hw *hw;
725
726         nb_rx = 0;
727         ring_idx = 0;
728         nb_rxd = 0;
729         idx = 0;
730
731         rxq = rx_queue;
732         hw = rxq->hw;
733
734         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
735
736         if (unlikely(rxq->stopped)) {
737                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
738                 return 0;
739         }
740
741         while (rcd->gen == rxq->comp_ring.gen) {
742                 struct rte_mbuf *newm;
743
744                 if (nb_rx >= nb_pkts)
745                         break;
746
747                 newm = rte_mbuf_raw_alloc(rxq->mp);
748                 if (unlikely(newm == NULL)) {
749                         PMD_RX_LOG(ERR, "Error allocating mbuf");
750                         rxq->stats.rx_buf_alloc_failure++;
751                         break;
752                 }
753
754                 idx = rcd->rxdIdx;
755                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
756                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
757                 RTE_SET_USED(rxd); /* used only for assert when enabled */
758                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
759
760                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
761
762                 RTE_ASSERT(rcd->len <= rxd->len);
763                 RTE_ASSERT(rbi->m);
764
765                 /* Get the packet buffer pointer from buf_info */
766                 rxm = rbi->m;
767
768                 /* Clear descriptor associated buf_info to be reused */
769                 rbi->m = NULL;
770                 rbi->bufPA = 0;
771
772                 /* Update the index that we received a packet */
773                 rxq->cmd_ring[ring_idx].next2comp = idx;
774
775                 /* For RCD with EOP set, check if there is frame error */
776                 if (unlikely(rcd->eop && rcd->err)) {
777                         rxq->stats.drop_total++;
778                         rxq->stats.drop_err++;
779
780                         if (!rcd->fcs) {
781                                 rxq->stats.drop_fcs++;
782                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
783                         }
784                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
785                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
786                                          rxq->comp_ring.base), rcd->rxdIdx);
787                         rte_pktmbuf_free_seg(rxm);
788                         goto rcd_done;
789                 }
790
791                 /* Initialize newly received packet buffer */
792                 rxm->port = rxq->port_id;
793                 rxm->nb_segs = 1;
794                 rxm->next = NULL;
795                 rxm->pkt_len = (uint16_t)rcd->len;
796                 rxm->data_len = (uint16_t)rcd->len;
797                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
798                 rxm->ol_flags = 0;
799                 rxm->vlan_tci = 0;
800
801                 /*
802                  * If this is the first buffer of the received packet,
803                  * set the pointer to the first mbuf of the packet
804                  * Otherwise, update the total length and the number of segments
805                  * of the current scattered packet, and update the pointer to
806                  * the last mbuf of the current packet.
807                  */
808                 if (rcd->sop) {
809                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
810
811                         if (unlikely(rcd->len == 0)) {
812                                 RTE_ASSERT(rcd->eop);
813
814                                 PMD_RX_LOG(DEBUG,
815                                            "Rx buf was skipped. rxring[%d][%d])",
816                                            ring_idx, idx);
817                                 rte_pktmbuf_free_seg(rxm);
818                                 goto rcd_done;
819                         }
820
821                         rxq->start_seg = rxm;
822                         vmxnet3_rx_offload(rcd, rxm);
823                 } else {
824                         struct rte_mbuf *start = rxq->start_seg;
825
826                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
827
828                         start->pkt_len += rxm->data_len;
829                         start->nb_segs++;
830
831                         rxq->last_seg->next = rxm;
832                 }
833                 rxq->last_seg = rxm;
834
835                 if (rcd->eop) {
836                         struct rte_mbuf *start = rxq->start_seg;
837
838                         /* Check for hardware stripped VLAN tag */
839                         if (rcd->ts) {
840                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
841                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
842                         }
843
844                         rx_pkts[nb_rx++] = start;
845                         rxq->start_seg = NULL;
846                 }
847
848 rcd_done:
849                 rxq->cmd_ring[ring_idx].next2comp = idx;
850                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
851                                           rxq->cmd_ring[ring_idx].size);
852
853                 /* It's time to renew descriptors */
854                 vmxnet3_renew_desc(rxq, ring_idx, newm);
855                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
856                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
857                                                rxq->cmd_ring[ring_idx].next2fill);
858                 }
859
860                 /* Advance to the next descriptor in comp_ring */
861                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
862
863                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
864                 nb_rxd++;
865                 if (nb_rxd > rxq->cmd_ring[0].size) {
866                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
867                                    " relinquish control.");
868                         break;
869                 }
870         }
871
872         return nb_rx;
873 }
874
875 /*
876  * Create memzone for device rings. malloc can't be used as the physical address is
877  * needed. If the memzone is already created, then this function returns a ptr
878  * to the old one.
879  */
880 static const struct rte_memzone *
881 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
882                       uint16_t queue_id, uint32_t ring_size, int socket_id)
883 {
884         char z_name[RTE_MEMZONE_NAMESIZE];
885         const struct rte_memzone *mz;
886
887         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
888                  dev->driver->pci_drv.driver.name, ring_name,
889                  dev->data->port_id, queue_id);
890
891         mz = rte_memzone_lookup(z_name);
892         if (mz)
893                 return mz;
894
895         return rte_memzone_reserve_aligned(z_name, ring_size,
896                                            socket_id, 0, VMXNET3_RING_BA_ALIGN);
897 }
898
899 int
900 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
901                            uint16_t queue_idx,
902                            uint16_t nb_desc,
903                            unsigned int socket_id,
904                            __rte_unused const struct rte_eth_txconf *tx_conf)
905 {
906         struct vmxnet3_hw *hw = dev->data->dev_private;
907         const struct rte_memzone *mz;
908         struct vmxnet3_tx_queue *txq;
909         struct vmxnet3_cmd_ring *ring;
910         struct vmxnet3_comp_ring *comp_ring;
911         struct vmxnet3_data_ring *data_ring;
912         int size;
913
914         PMD_INIT_FUNC_TRACE();
915
916         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
917             ETH_TXQ_FLAGS_NOXSUMSCTP) {
918                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
919                 return -EINVAL;
920         }
921
922         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
923                           RTE_CACHE_LINE_SIZE);
924         if (txq == NULL) {
925                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
926                 return -ENOMEM;
927         }
928
929         txq->queue_id = queue_idx;
930         txq->port_id = dev->data->port_id;
931         txq->shared = &hw->tqd_start[queue_idx];
932         txq->hw = hw;
933         txq->qid = queue_idx;
934         txq->stopped = TRUE;
935
936         ring = &txq->cmd_ring;
937         comp_ring = &txq->comp_ring;
938         data_ring = &txq->data_ring;
939
940         /* Tx vmxnet ring length should be between 512-4096 */
941         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
942                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
943                              VMXNET3_DEF_TX_RING_SIZE);
944                 return -EINVAL;
945         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
946                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
947                              VMXNET3_TX_RING_MAX_SIZE);
948                 return -EINVAL;
949         } else {
950                 ring->size = nb_desc;
951                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
952         }
953         comp_ring->size = data_ring->size = ring->size;
954
955         /* Tx vmxnet rings structure initialization*/
956         ring->next2fill = 0;
957         ring->next2comp = 0;
958         ring->gen = VMXNET3_INIT_GEN;
959         comp_ring->next2proc = 0;
960         comp_ring->gen = VMXNET3_INIT_GEN;
961
962         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
963         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
964         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
965
966         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
967         if (mz == NULL) {
968                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
969                 return -ENOMEM;
970         }
971         memset(mz->addr, 0, mz->len);
972
973         /* cmd_ring initialization */
974         ring->base = mz->addr;
975         ring->basePA = mz->phys_addr;
976
977         /* comp_ring initialization */
978         comp_ring->base = ring->base + ring->size;
979         comp_ring->basePA = ring->basePA +
980                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
981
982         /* data_ring initialization */
983         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
984         data_ring->basePA = comp_ring->basePA +
985                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
986
987         /* cmd_ring0 buf_info allocation */
988         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
989                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
990         if (ring->buf_info == NULL) {
991                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
992                 return -ENOMEM;
993         }
994
995         /* Update the data portion with txq */
996         dev->data->tx_queues[queue_idx] = txq;
997
998         return 0;
999 }
1000
1001 int
1002 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1003                            uint16_t queue_idx,
1004                            uint16_t nb_desc,
1005                            unsigned int socket_id,
1006                            __rte_unused const struct rte_eth_rxconf *rx_conf,
1007                            struct rte_mempool *mp)
1008 {
1009         const struct rte_memzone *mz;
1010         struct vmxnet3_rx_queue *rxq;
1011         struct vmxnet3_hw *hw = dev->data->dev_private;
1012         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1013         struct vmxnet3_comp_ring *comp_ring;
1014         int size;
1015         uint8_t i;
1016         char mem_name[32];
1017
1018         PMD_INIT_FUNC_TRACE();
1019
1020         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1021                           RTE_CACHE_LINE_SIZE);
1022         if (rxq == NULL) {
1023                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1024                 return -ENOMEM;
1025         }
1026
1027         rxq->mp = mp;
1028         rxq->queue_id = queue_idx;
1029         rxq->port_id = dev->data->port_id;
1030         rxq->shared = &hw->rqd_start[queue_idx];
1031         rxq->hw = hw;
1032         rxq->qid1 = queue_idx;
1033         rxq->qid2 = queue_idx + hw->num_rx_queues;
1034         rxq->stopped = TRUE;
1035
1036         ring0 = &rxq->cmd_ring[0];
1037         ring1 = &rxq->cmd_ring[1];
1038         comp_ring = &rxq->comp_ring;
1039
1040         /* Rx vmxnet rings length should be between 256-4096 */
1041         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1042                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1043                 return -EINVAL;
1044         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1045                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1046                 return -EINVAL;
1047         } else {
1048                 ring0->size = nb_desc;
1049                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1050                 ring1->size = ring0->size;
1051         }
1052
1053         comp_ring->size = ring0->size + ring1->size;
1054
1055         /* Rx vmxnet rings structure initialization */
1056         ring0->next2fill = 0;
1057         ring1->next2fill = 0;
1058         ring0->next2comp = 0;
1059         ring1->next2comp = 0;
1060         ring0->gen = VMXNET3_INIT_GEN;
1061         ring1->gen = VMXNET3_INIT_GEN;
1062         comp_ring->next2proc = 0;
1063         comp_ring->gen = VMXNET3_INIT_GEN;
1064
1065         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1066         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1067
1068         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1069         if (mz == NULL) {
1070                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1071                 return -ENOMEM;
1072         }
1073         memset(mz->addr, 0, mz->len);
1074
1075         /* cmd_ring0 initialization */
1076         ring0->base = mz->addr;
1077         ring0->basePA = mz->phys_addr;
1078
1079         /* cmd_ring1 initialization */
1080         ring1->base = ring0->base + ring0->size;
1081         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1082
1083         /* comp_ring initialization */
1084         comp_ring->base = ring1->base + ring1->size;
1085         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1086                 ring1->size;
1087
1088         /* cmd_ring0-cmd_ring1 buf_info allocation */
1089         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1090
1091                 ring = &rxq->cmd_ring[i];
1092                 ring->rid = i;
1093                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1094
1095                 ring->buf_info = rte_zmalloc(mem_name,
1096                                              ring->size * sizeof(vmxnet3_buf_info_t),
1097                                              RTE_CACHE_LINE_SIZE);
1098                 if (ring->buf_info == NULL) {
1099                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1100                         return -ENOMEM;
1101                 }
1102         }
1103
1104         /* Update the data portion with rxq */
1105         dev->data->rx_queues[queue_idx] = rxq;
1106
1107         return 0;
1108 }
1109
1110 /*
1111  * Initializes Receive Unit
1112  * Load mbufs in rx queue in advance
1113  */
1114 int
1115 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1116 {
1117         struct vmxnet3_hw *hw = dev->data->dev_private;
1118
1119         int i, ret;
1120         uint8_t j;
1121
1122         PMD_INIT_FUNC_TRACE();
1123
1124         for (i = 0; i < hw->num_rx_queues; i++) {
1125                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1126
1127                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1128                         /* Passing 0 as alloc_num will allocate full ring */
1129                         ret = vmxnet3_post_rx_bufs(rxq, j);
1130                         if (ret <= 0) {
1131                                 PMD_INIT_LOG(ERR,
1132                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1133                                              i, j);
1134                                 return -ret;
1135                         }
1136                         /*
1137                          * Updating device with the index:next2fill to fill the
1138                          * mbufs for coming packets.
1139                          */
1140                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1141                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1142                                                        rxq->cmd_ring[j].next2fill);
1143                         }
1144                 }
1145                 rxq->stopped = FALSE;
1146                 rxq->start_seg = NULL;
1147         }
1148
1149         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1150                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1151
1152                 txq->stopped = FALSE;
1153         }
1154
1155         return 0;
1156 }
1157
1158 static uint8_t rss_intel_key[40] = {
1159         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1160         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1161         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1162         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1163         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1164 };
1165
1166 /*
1167  * Configure RSS feature
1168  */
1169 int
1170 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1171 {
1172         struct vmxnet3_hw *hw = dev->data->dev_private;
1173         struct VMXNET3_RSSConf *dev_rss_conf;
1174         struct rte_eth_rss_conf *port_rss_conf;
1175         uint64_t rss_hf;
1176         uint8_t i, j;
1177
1178         PMD_INIT_FUNC_TRACE();
1179
1180         dev_rss_conf = hw->rss_conf;
1181         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1182
1183         /* loading hashFunc */
1184         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1185         /* loading hashKeySize */
1186         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1187         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1188         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1189
1190         if (port_rss_conf->rss_key == NULL) {
1191                 /* Default hash key */
1192                 port_rss_conf->rss_key = rss_intel_key;
1193         }
1194
1195         /* loading hashKey */
1196         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1197                dev_rss_conf->hashKeySize);
1198
1199         /* loading indTable */
1200         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1201                 if (j == dev->data->nb_rx_queues)
1202                         j = 0;
1203                 dev_rss_conf->indTable[i] = j;
1204         }
1205
1206         /* loading hashType */
1207         dev_rss_conf->hashType = 0;
1208         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1209         if (rss_hf & ETH_RSS_IPV4)
1210                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1211         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1212                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1213         if (rss_hf & ETH_RSS_IPV6)
1214                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1215         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1216                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1217
1218         return VMXNET3_SUCCESS;
1219 }