net/vmxnet3: add Tx preparation
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_net.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define VMXNET3_TX_OFFLOAD_MASK ( \
81                 PKT_TX_VLAN_PKT | \
82                 PKT_TX_L4_MASK |  \
83                 PKT_TX_TCP_SEG)
84
85 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK  \
86         (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
87
88 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
91 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
98 static void
99 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
100 {
101         uint32_t avail = 0;
102
103         if (rxq == NULL)
104                 return;
105
106         PMD_RX_LOG(DEBUG,
107                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
108                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
109         PMD_RX_LOG(DEBUG,
110                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
111                    (unsigned long)rxq->cmd_ring[0].basePA,
112                    (unsigned long)rxq->cmd_ring[1].basePA,
113                    (unsigned long)rxq->comp_ring.basePA);
114
115         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
116         PMD_RX_LOG(DEBUG,
117                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
118                    (uint32_t)rxq->cmd_ring[0].size, avail,
119                    rxq->comp_ring.next2proc,
120                    rxq->cmd_ring[0].size - avail);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
124                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
125                    rxq->cmd_ring[1].size - avail);
126
127 }
128
129 static void
130 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
131 {
132         uint32_t avail = 0;
133
134         if (txq == NULL)
135                 return;
136
137         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
138                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
139         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
140                    (unsigned long)txq->cmd_ring.basePA,
141                    (unsigned long)txq->comp_ring.basePA,
142                    (unsigned long)txq->data_ring.basePA);
143
144         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
145         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
146                    (uint32_t)txq->cmd_ring.size, avail,
147                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
148 }
149 #endif
150
151 static void
152 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
153 {
154         while (ring->next2comp != ring->next2fill) {
155                 /* No need to worry about desc ownership, device is quiesced by now. */
156                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
157
158                 if (buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166 }
167
168 static void
169 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
170 {
171         uint32_t i;
172
173         for (i = 0; i < ring->size; i++) {
174                 /* No need to worry about desc ownership, device is quiesced by now. */
175                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
176
177                 if (buf_info->m) {
178                         rte_pktmbuf_free_seg(buf_info->m);
179                         buf_info->m = NULL;
180                         buf_info->bufPA = 0;
181                         buf_info->len = 0;
182                 }
183                 vmxnet3_cmd_ring_adv_next2comp(ring);
184         }
185 }
186
187 static void
188 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
189 {
190         rte_free(ring->buf_info);
191         ring->buf_info = NULL;
192 }
193
194 void
195 vmxnet3_dev_tx_queue_release(void *txq)
196 {
197         vmxnet3_tx_queue_t *tq = txq;
198
199         if (tq != NULL) {
200                 /* Release mbufs */
201                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
202                 /* Release the cmd_ring */
203                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
204         }
205 }
206
207 void
208 vmxnet3_dev_rx_queue_release(void *rxq)
209 {
210         int i;
211         vmxnet3_rx_queue_t *rq = rxq;
212
213         if (rq != NULL) {
214                 /* Release mbufs */
215                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
216                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
217
218                 /* Release both the cmd_rings */
219                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
220                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
221         }
222 }
223
224 static void
225 vmxnet3_dev_tx_queue_reset(void *txq)
226 {
227         vmxnet3_tx_queue_t *tq = txq;
228         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
229         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
230         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
231         int size;
232
233         if (tq != NULL) {
234                 /* Release the cmd_ring mbufs */
235                 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
236         }
237
238         /* Tx vmxnet rings structure initialization*/
239         ring->next2fill = 0;
240         ring->next2comp = 0;
241         ring->gen = VMXNET3_INIT_GEN;
242         comp_ring->next2proc = 0;
243         comp_ring->gen = VMXNET3_INIT_GEN;
244
245         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
246         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
247         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
248
249         memset(ring->base, 0, size);
250 }
251
252 static void
253 vmxnet3_dev_rx_queue_reset(void *rxq)
254 {
255         int i;
256         vmxnet3_rx_queue_t *rq = rxq;
257         struct vmxnet3_cmd_ring *ring0, *ring1;
258         struct vmxnet3_comp_ring *comp_ring;
259         int size;
260
261         if (rq != NULL) {
262                 /* Release both the cmd_rings mbufs */
263                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
264                         vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
265         }
266
267         ring0 = &rq->cmd_ring[0];
268         ring1 = &rq->cmd_ring[1];
269         comp_ring = &rq->comp_ring;
270
271         /* Rx vmxnet rings structure initialization */
272         ring0->next2fill = 0;
273         ring1->next2fill = 0;
274         ring0->next2comp = 0;
275         ring1->next2comp = 0;
276         ring0->gen = VMXNET3_INIT_GEN;
277         ring1->gen = VMXNET3_INIT_GEN;
278         comp_ring->next2proc = 0;
279         comp_ring->gen = VMXNET3_INIT_GEN;
280
281         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
282         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
283
284         memset(ring0->base, 0, size);
285 }
286
287 void
288 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
289 {
290         unsigned i;
291
292         PMD_INIT_FUNC_TRACE();
293
294         for (i = 0; i < dev->data->nb_tx_queues; i++) {
295                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
296
297                 if (txq != NULL) {
298                         txq->stopped = TRUE;
299                         vmxnet3_dev_tx_queue_reset(txq);
300                 }
301         }
302
303         for (i = 0; i < dev->data->nb_rx_queues; i++) {
304                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
305
306                 if (rxq != NULL) {
307                         rxq->stopped = TRUE;
308                         vmxnet3_dev_rx_queue_reset(rxq);
309                 }
310         }
311 }
312
313 static int
314 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
315 {
316         int completed = 0;
317         struct rte_mbuf *mbuf;
318
319         /* Release cmd_ring descriptor and free mbuf */
320         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
321
322         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
323         if (mbuf == NULL)
324                 rte_panic("EOP desc does not point to a valid mbuf");
325         rte_pktmbuf_free(mbuf);
326
327         txq->cmd_ring.buf_info[eop_idx].m = NULL;
328
329         while (txq->cmd_ring.next2comp != eop_idx) {
330                 /* no out-of-order completion */
331                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
332                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
333                 completed++;
334         }
335
336         /* Mark the txd for which tcd was generated as completed */
337         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
338
339         return completed + 1;
340 }
341
342 static void
343 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
344 {
345         int completed = 0;
346         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
347         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
348                 (comp_ring->base + comp_ring->next2proc);
349
350         while (tcd->gen == comp_ring->gen) {
351                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
352
353                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
354                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
355                                                     comp_ring->next2proc);
356         }
357
358         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
359 }
360
361 uint16_t
362 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
363         uint16_t nb_pkts)
364 {
365         int32_t ret;
366         uint32_t i;
367         uint64_t ol_flags;
368         struct rte_mbuf *m;
369
370         for (i = 0; i != nb_pkts; i++) {
371                 m = tx_pkts[i];
372                 ol_flags = m->ol_flags;
373
374                 /* Non-TSO packet cannot occupy more than
375                  * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
376                  */
377                 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
378                                 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
379                         rte_errno = -EINVAL;
380                         return i;
381                 }
382
383                 /* check that only supported TX offloads are requested. */
384                 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
385                                 (ol_flags & PKT_TX_L4_MASK) ==
386                                 PKT_TX_SCTP_CKSUM) {
387                         rte_errno = -ENOTSUP;
388                         return i;
389                 }
390
391 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
392                 ret = rte_validate_tx_offload(m);
393                 if (ret != 0) {
394                         rte_errno = ret;
395                         return i;
396                 }
397 #endif
398                 ret = rte_net_intel_cksum_prepare(m);
399                 if (ret != 0) {
400                         rte_errno = ret;
401                         return i;
402                 }
403         }
404
405         return i;
406 }
407
408 uint16_t
409 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
410                   uint16_t nb_pkts)
411 {
412         uint16_t nb_tx;
413         vmxnet3_tx_queue_t *txq = tx_queue;
414         struct vmxnet3_hw *hw = txq->hw;
415         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
416         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
417
418         if (unlikely(txq->stopped)) {
419                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
420                 return 0;
421         }
422
423         /* Free up the comp_descriptors aggressively */
424         vmxnet3_tq_tx_complete(txq);
425
426         nb_tx = 0;
427         while (nb_tx < nb_pkts) {
428                 Vmxnet3_GenericDesc *gdesc;
429                 vmxnet3_buf_info_t *tbi;
430                 uint32_t first2fill, avail, dw2;
431                 struct rte_mbuf *txm = tx_pkts[nb_tx];
432                 struct rte_mbuf *m_seg = txm;
433                 int copy_size = 0;
434                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
435                 /* # of descriptors needed for a packet. */
436                 unsigned count = txm->nb_segs;
437
438                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
439                 if (count > avail) {
440                         /* Is command ring full? */
441                         if (unlikely(avail == 0)) {
442                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
443                                 txq->stats.tx_ring_full++;
444                                 txq->stats.drop_total += (nb_pkts - nb_tx);
445                                 break;
446                         }
447
448                         /* Command ring is not full but cannot handle the
449                          * multi-segmented packet. Let's try the next packet
450                          * in this case.
451                          */
452                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
453                                    "(avail %d needed %d)", avail, count);
454                         txq->stats.drop_total++;
455                         if (tso)
456                                 txq->stats.drop_tso++;
457                         rte_pktmbuf_free(txm);
458                         nb_tx++;
459                         continue;
460                 }
461
462                 /* Drop non-TSO packet that is excessively fragmented */
463                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
464                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
465                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
466                         txq->stats.drop_too_many_segs++;
467                         txq->stats.drop_total++;
468                         rte_pktmbuf_free(txm);
469                         nb_tx++;
470                         continue;
471                 }
472
473                 if (txm->nb_segs == 1 &&
474                     rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
475                         struct Vmxnet3_TxDataDesc *tdd;
476
477                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
478                         copy_size = rte_pktmbuf_pkt_len(txm);
479                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
480                 }
481
482                 /* use the previous gen bit for the SOP desc */
483                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
484                 first2fill = txq->cmd_ring.next2fill;
485                 do {
486                         /* Remember the transmit buffer for cleanup */
487                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
488
489                         /* NB: the following assumes that VMXNET3 maximum
490                          * transmit buffer size (16K) is greater than
491                          * maximum size of mbuf segment size.
492                          */
493                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
494                         if (copy_size)
495                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
496                                                                    txq->cmd_ring.next2fill *
497                                                                    sizeof(struct Vmxnet3_TxDataDesc));
498                         else
499                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
500
501                         gdesc->dword[2] = dw2 | m_seg->data_len;
502                         gdesc->dword[3] = 0;
503
504                         /* move to the next2fill descriptor */
505                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
506
507                         /* use the right gen for non-SOP desc */
508                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
509                 } while ((m_seg = m_seg->next) != NULL);
510
511                 /* set the last buf_info for the pkt */
512                 tbi->m = txm;
513                 /* Update the EOP descriptor */
514                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
515
516                 /* Add VLAN tag if present */
517                 gdesc = txq->cmd_ring.base + first2fill;
518                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
519                         gdesc->txd.ti = 1;
520                         gdesc->txd.tci = txm->vlan_tci;
521                 }
522
523                 if (tso) {
524                         uint16_t mss = txm->tso_segsz;
525
526                         RTE_ASSERT(mss > 0);
527
528                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
529                         gdesc->txd.om = VMXNET3_OM_TSO;
530                         gdesc->txd.msscof = mss;
531
532                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
533                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
534                         gdesc->txd.om = VMXNET3_OM_CSUM;
535                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
536
537                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
538                         case PKT_TX_TCP_CKSUM:
539                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
540                                 break;
541                         case PKT_TX_UDP_CKSUM:
542                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
543                                 break;
544                         default:
545                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
546                                            txm->ol_flags & PKT_TX_L4_MASK);
547                                 abort();
548                         }
549                         deferred++;
550                 } else {
551                         gdesc->txd.hlen = 0;
552                         gdesc->txd.om = VMXNET3_OM_NONE;
553                         gdesc->txd.msscof = 0;
554                         deferred++;
555                 }
556
557                 /* flip the GEN bit on the SOP */
558                 rte_compiler_barrier();
559                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
560
561                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
562                 nb_tx++;
563         }
564
565         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
566
567         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
568                 txq_ctrl->txNumDeferred = 0;
569                 /* Notify vSwitch that packets are available. */
570                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
571                                        txq->cmd_ring.next2fill);
572         }
573
574         return nb_tx;
575 }
576
577 /*
578  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
579  *  so that device can receive packets in those buffers.
580  *  Ring layout:
581  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
582  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
583  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
584  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
585  *      only for LRO.
586  */
587 static int
588 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
589 {
590         int err = 0;
591         uint32_t i = 0, val = 0;
592         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593
594         if (ring_id == 0) {
595                 /* Usually: One HEAD type buf per packet
596                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
597                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
598                  */
599
600                 /* We use single packet buffer so all heads here */
601                 val = VMXNET3_RXD_BTYPE_HEAD;
602         } else {
603                 /* All BODY type buffers for 2nd ring */
604                 val = VMXNET3_RXD_BTYPE_BODY;
605         }
606
607         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
608                 struct Vmxnet3_RxDesc *rxd;
609                 struct rte_mbuf *mbuf;
610                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
611
612                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
613
614                 /* Allocate blank mbuf for the current Rx Descriptor */
615                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
616                 if (unlikely(mbuf == NULL)) {
617                         PMD_RX_LOG(ERR, "Error allocating mbuf");
618                         rxq->stats.rx_buf_alloc_failure++;
619                         err = ENOMEM;
620                         break;
621                 }
622
623                 /*
624                  * Load mbuf pointer into buf_info[ring_size]
625                  * buf_info structure is equivalent to cookie for virtio-virtqueue
626                  */
627                 buf_info->m = mbuf;
628                 buf_info->len = (uint16_t)(mbuf->buf_len -
629                                            RTE_PKTMBUF_HEADROOM);
630                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
631
632                 /* Load Rx Descriptor with the buffer's GPA */
633                 rxd->addr = buf_info->bufPA;
634
635                 /* After this point rxd->addr MUST not be NULL */
636                 rxd->btype = val;
637                 rxd->len = buf_info->len;
638                 /* Flip gen bit at the end to change ownership */
639                 rxd->gen = ring->gen;
640
641                 vmxnet3_cmd_ring_adv_next2fill(ring);
642                 i++;
643         }
644
645         /* Return error only if no buffers are posted at present */
646         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
647                 return -err;
648         else
649                 return i;
650 }
651
652
653 /* Receive side checksum and other offloads */
654 static void
655 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
656 {
657         /* Check for RSS */
658         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
659                 rxm->ol_flags |= PKT_RX_RSS_HASH;
660                 rxm->hash.rss = rcd->rssHash;
661         }
662
663         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
664         if (rcd->v4) {
665                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
666                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
667
668                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
669                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
670                 else
671                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
672
673                 if (!rcd->cnc) {
674                         if (!rcd->ipc)
675                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
676
677                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
678                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
679                 }
680         }
681 }
682
683 /*
684  * Process the Rx Completion Ring of given vmxnet3_rx_queue
685  * for nb_pkts burst and return the number of packets received
686  */
687 uint16_t
688 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
689 {
690         uint16_t nb_rx;
691         uint32_t nb_rxd, idx;
692         uint8_t ring_idx;
693         vmxnet3_rx_queue_t *rxq;
694         Vmxnet3_RxCompDesc *rcd;
695         vmxnet3_buf_info_t *rbi;
696         Vmxnet3_RxDesc *rxd;
697         struct rte_mbuf *rxm = NULL;
698         struct vmxnet3_hw *hw;
699
700         nb_rx = 0;
701         ring_idx = 0;
702         nb_rxd = 0;
703         idx = 0;
704
705         rxq = rx_queue;
706         hw = rxq->hw;
707
708         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
709
710         if (unlikely(rxq->stopped)) {
711                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
712                 return 0;
713         }
714
715         while (rcd->gen == rxq->comp_ring.gen) {
716                 if (nb_rx >= nb_pkts)
717                         break;
718
719                 idx = rcd->rxdIdx;
720                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
721                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
722                 RTE_SET_USED(rxd); /* used only for assert when enabled */
723                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
724
725                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
726
727                 RTE_ASSERT(rcd->len <= rxd->len);
728                 RTE_ASSERT(rbi->m);
729
730                 /* Get the packet buffer pointer from buf_info */
731                 rxm = rbi->m;
732
733                 /* Clear descriptor associated buf_info to be reused */
734                 rbi->m = NULL;
735                 rbi->bufPA = 0;
736
737                 /* Update the index that we received a packet */
738                 rxq->cmd_ring[ring_idx].next2comp = idx;
739
740                 /* For RCD with EOP set, check if there is frame error */
741                 if (unlikely(rcd->eop && rcd->err)) {
742                         rxq->stats.drop_total++;
743                         rxq->stats.drop_err++;
744
745                         if (!rcd->fcs) {
746                                 rxq->stats.drop_fcs++;
747                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
748                         }
749                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
750                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
751                                          rxq->comp_ring.base), rcd->rxdIdx);
752                         rte_pktmbuf_free_seg(rxm);
753                         goto rcd_done;
754                 }
755
756                 /* Initialize newly received packet buffer */
757                 rxm->port = rxq->port_id;
758                 rxm->nb_segs = 1;
759                 rxm->next = NULL;
760                 rxm->pkt_len = (uint16_t)rcd->len;
761                 rxm->data_len = (uint16_t)rcd->len;
762                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
763                 rxm->ol_flags = 0;
764                 rxm->vlan_tci = 0;
765
766                 /*
767                  * If this is the first buffer of the received packet,
768                  * set the pointer to the first mbuf of the packet
769                  * Otherwise, update the total length and the number of segments
770                  * of the current scattered packet, and update the pointer to
771                  * the last mbuf of the current packet.
772                  */
773                 if (rcd->sop) {
774                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
775
776                         if (unlikely(rcd->len == 0)) {
777                                 RTE_ASSERT(rcd->eop);
778
779                                 PMD_RX_LOG(DEBUG,
780                                            "Rx buf was skipped. rxring[%d][%d])",
781                                            ring_idx, idx);
782                                 rte_pktmbuf_free_seg(rxm);
783                                 goto rcd_done;
784                         }
785
786                         rxq->start_seg = rxm;
787                         vmxnet3_rx_offload(rcd, rxm);
788                 } else {
789                         struct rte_mbuf *start = rxq->start_seg;
790
791                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
792
793                         start->pkt_len += rxm->data_len;
794                         start->nb_segs++;
795
796                         rxq->last_seg->next = rxm;
797                 }
798                 rxq->last_seg = rxm;
799
800                 if (rcd->eop) {
801                         struct rte_mbuf *start = rxq->start_seg;
802
803                         /* Check for hardware stripped VLAN tag */
804                         if (rcd->ts) {
805                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
806                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
807                         }
808
809                         rx_pkts[nb_rx++] = start;
810                         rxq->start_seg = NULL;
811                 }
812
813 rcd_done:
814                 rxq->cmd_ring[ring_idx].next2comp = idx;
815                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
816                                           rxq->cmd_ring[ring_idx].size);
817
818                 /* It's time to allocate some new buf and renew descriptors */
819                 vmxnet3_post_rx_bufs(rxq, ring_idx);
820                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
821                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
822                                                rxq->cmd_ring[ring_idx].next2fill);
823                 }
824
825                 /* Advance to the next descriptor in comp_ring */
826                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
827
828                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
829                 nb_rxd++;
830                 if (nb_rxd > rxq->cmd_ring[0].size) {
831                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
832                                    " relinquish control.");
833                         break;
834                 }
835         }
836
837         return nb_rx;
838 }
839
840 /*
841  * Create memzone for device rings. malloc can't be used as the physical address is
842  * needed. If the memzone is already created, then this function returns a ptr
843  * to the old one.
844  */
845 static const struct rte_memzone *
846 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
847                       uint16_t queue_id, uint32_t ring_size, int socket_id)
848 {
849         char z_name[RTE_MEMZONE_NAMESIZE];
850         const struct rte_memzone *mz;
851
852         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
853                  dev->driver->pci_drv.driver.name, ring_name,
854                  dev->data->port_id, queue_id);
855
856         mz = rte_memzone_lookup(z_name);
857         if (mz)
858                 return mz;
859
860         return rte_memzone_reserve_aligned(z_name, ring_size,
861                                            socket_id, 0, VMXNET3_RING_BA_ALIGN);
862 }
863
864 int
865 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
866                            uint16_t queue_idx,
867                            uint16_t nb_desc,
868                            unsigned int socket_id,
869                            __rte_unused const struct rte_eth_txconf *tx_conf)
870 {
871         struct vmxnet3_hw *hw = dev->data->dev_private;
872         const struct rte_memzone *mz;
873         struct vmxnet3_tx_queue *txq;
874         struct vmxnet3_cmd_ring *ring;
875         struct vmxnet3_comp_ring *comp_ring;
876         struct vmxnet3_data_ring *data_ring;
877         int size;
878
879         PMD_INIT_FUNC_TRACE();
880
881         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
882             ETH_TXQ_FLAGS_NOXSUMSCTP) {
883                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
884                 return -EINVAL;
885         }
886
887         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
888                           RTE_CACHE_LINE_SIZE);
889         if (txq == NULL) {
890                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
891                 return -ENOMEM;
892         }
893
894         txq->queue_id = queue_idx;
895         txq->port_id = dev->data->port_id;
896         txq->shared = &hw->tqd_start[queue_idx];
897         txq->hw = hw;
898         txq->qid = queue_idx;
899         txq->stopped = TRUE;
900
901         ring = &txq->cmd_ring;
902         comp_ring = &txq->comp_ring;
903         data_ring = &txq->data_ring;
904
905         /* Tx vmxnet ring length should be between 512-4096 */
906         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
907                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
908                              VMXNET3_DEF_TX_RING_SIZE);
909                 return -EINVAL;
910         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
911                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
912                              VMXNET3_TX_RING_MAX_SIZE);
913                 return -EINVAL;
914         } else {
915                 ring->size = nb_desc;
916                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
917         }
918         comp_ring->size = data_ring->size = ring->size;
919
920         /* Tx vmxnet rings structure initialization*/
921         ring->next2fill = 0;
922         ring->next2comp = 0;
923         ring->gen = VMXNET3_INIT_GEN;
924         comp_ring->next2proc = 0;
925         comp_ring->gen = VMXNET3_INIT_GEN;
926
927         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
928         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
929         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
930
931         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
932         if (mz == NULL) {
933                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
934                 return -ENOMEM;
935         }
936         memset(mz->addr, 0, mz->len);
937
938         /* cmd_ring initialization */
939         ring->base = mz->addr;
940         ring->basePA = mz->phys_addr;
941
942         /* comp_ring initialization */
943         comp_ring->base = ring->base + ring->size;
944         comp_ring->basePA = ring->basePA +
945                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
946
947         /* data_ring initialization */
948         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
949         data_ring->basePA = comp_ring->basePA +
950                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
951
952         /* cmd_ring0 buf_info allocation */
953         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
954                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
955         if (ring->buf_info == NULL) {
956                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
957                 return -ENOMEM;
958         }
959
960         /* Update the data portion with txq */
961         dev->data->tx_queues[queue_idx] = txq;
962
963         return 0;
964 }
965
966 int
967 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
968                            uint16_t queue_idx,
969                            uint16_t nb_desc,
970                            unsigned int socket_id,
971                            __rte_unused const struct rte_eth_rxconf *rx_conf,
972                            struct rte_mempool *mp)
973 {
974         const struct rte_memzone *mz;
975         struct vmxnet3_rx_queue *rxq;
976         struct vmxnet3_hw *hw = dev->data->dev_private;
977         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
978         struct vmxnet3_comp_ring *comp_ring;
979         int size;
980         uint8_t i;
981         char mem_name[32];
982
983         PMD_INIT_FUNC_TRACE();
984
985         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
986                           RTE_CACHE_LINE_SIZE);
987         if (rxq == NULL) {
988                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
989                 return -ENOMEM;
990         }
991
992         rxq->mp = mp;
993         rxq->queue_id = queue_idx;
994         rxq->port_id = dev->data->port_id;
995         rxq->shared = &hw->rqd_start[queue_idx];
996         rxq->hw = hw;
997         rxq->qid1 = queue_idx;
998         rxq->qid2 = queue_idx + hw->num_rx_queues;
999         rxq->stopped = TRUE;
1000
1001         ring0 = &rxq->cmd_ring[0];
1002         ring1 = &rxq->cmd_ring[1];
1003         comp_ring = &rxq->comp_ring;
1004
1005         /* Rx vmxnet rings length should be between 256-4096 */
1006         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1007                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1008                 return -EINVAL;
1009         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1010                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1011                 return -EINVAL;
1012         } else {
1013                 ring0->size = nb_desc;
1014                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1015                 ring1->size = ring0->size;
1016         }
1017
1018         comp_ring->size = ring0->size + ring1->size;
1019
1020         /* Rx vmxnet rings structure initialization */
1021         ring0->next2fill = 0;
1022         ring1->next2fill = 0;
1023         ring0->next2comp = 0;
1024         ring1->next2comp = 0;
1025         ring0->gen = VMXNET3_INIT_GEN;
1026         ring1->gen = VMXNET3_INIT_GEN;
1027         comp_ring->next2proc = 0;
1028         comp_ring->gen = VMXNET3_INIT_GEN;
1029
1030         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1031         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1032
1033         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
1034         if (mz == NULL) {
1035                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1036                 return -ENOMEM;
1037         }
1038         memset(mz->addr, 0, mz->len);
1039
1040         /* cmd_ring0 initialization */
1041         ring0->base = mz->addr;
1042         ring0->basePA = mz->phys_addr;
1043
1044         /* cmd_ring1 initialization */
1045         ring1->base = ring0->base + ring0->size;
1046         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1047
1048         /* comp_ring initialization */
1049         comp_ring->base = ring1->base + ring1->size;
1050         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1051                 ring1->size;
1052
1053         /* cmd_ring0-cmd_ring1 buf_info allocation */
1054         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1055
1056                 ring = &rxq->cmd_ring[i];
1057                 ring->rid = i;
1058                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1059
1060                 ring->buf_info = rte_zmalloc(mem_name,
1061                                              ring->size * sizeof(vmxnet3_buf_info_t),
1062                                              RTE_CACHE_LINE_SIZE);
1063                 if (ring->buf_info == NULL) {
1064                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1065                         return -ENOMEM;
1066                 }
1067         }
1068
1069         /* Update the data portion with rxq */
1070         dev->data->rx_queues[queue_idx] = rxq;
1071
1072         return 0;
1073 }
1074
1075 /*
1076  * Initializes Receive Unit
1077  * Load mbufs in rx queue in advance
1078  */
1079 int
1080 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1081 {
1082         struct vmxnet3_hw *hw = dev->data->dev_private;
1083
1084         int i, ret;
1085         uint8_t j;
1086
1087         PMD_INIT_FUNC_TRACE();
1088
1089         for (i = 0; i < hw->num_rx_queues; i++) {
1090                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1091
1092                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1093                         /* Passing 0 as alloc_num will allocate full ring */
1094                         ret = vmxnet3_post_rx_bufs(rxq, j);
1095                         if (ret <= 0) {
1096                                 PMD_INIT_LOG(ERR,
1097                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1098                                              i, j);
1099                                 return -ret;
1100                         }
1101                         /*
1102                          * Updating device with the index:next2fill to fill the
1103                          * mbufs for coming packets.
1104                          */
1105                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1106                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1107                                                        rxq->cmd_ring[j].next2fill);
1108                         }
1109                 }
1110                 rxq->stopped = FALSE;
1111                 rxq->start_seg = NULL;
1112         }
1113
1114         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1115                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1116
1117                 txq->stopped = FALSE;
1118         }
1119
1120         return 0;
1121 }
1122
1123 static uint8_t rss_intel_key[40] = {
1124         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1125         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1126         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1127         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1128         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1129 };
1130
1131 /*
1132  * Configure RSS feature
1133  */
1134 int
1135 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1136 {
1137         struct vmxnet3_hw *hw = dev->data->dev_private;
1138         struct VMXNET3_RSSConf *dev_rss_conf;
1139         struct rte_eth_rss_conf *port_rss_conf;
1140         uint64_t rss_hf;
1141         uint8_t i, j;
1142
1143         PMD_INIT_FUNC_TRACE();
1144
1145         dev_rss_conf = hw->rss_conf;
1146         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1147
1148         /* loading hashFunc */
1149         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1150         /* loading hashKeySize */
1151         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1152         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1153         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1154
1155         if (port_rss_conf->rss_key == NULL) {
1156                 /* Default hash key */
1157                 port_rss_conf->rss_key = rss_intel_key;
1158         }
1159
1160         /* loading hashKey */
1161         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1162                dev_rss_conf->hashKeySize);
1163
1164         /* loading indTable */
1165         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1166                 if (j == dev->data->nb_rx_queues)
1167                         j = 0;
1168                 dev_rss_conf->indTable[i] = j;
1169         }
1170
1171         /* loading hashType */
1172         dev_rss_conf->hashType = 0;
1173         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1174         if (rss_hf & ETH_RSS_IPV4)
1175                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1176         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1177                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1178         if (rss_hf & ETH_RSS_IPV6)
1179                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1180         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1181                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1182
1183         return VMXNET3_SUCCESS;
1184 }