vmxnet3: support RSS and refactor Rx offload
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
82
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
85
86 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
87
88 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
89 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
93 #endif
94
95 static inline struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
97 {
98         struct rte_mbuf *m;
99
100         m = __rte_mbuf_raw_alloc(mp);
101         __rte_mbuf_sanity_check_raw(m, 0);
102         return m;
103 }
104
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
106 static void
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
108 {
109         uint32_t avail = 0;
110
111         if (rxq == NULL)
112                 return;
113
114         PMD_RX_LOG(DEBUG,
115                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG,
118                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119                    (unsigned long)rxq->cmd_ring[0].basePA,
120                    (unsigned long)rxq->cmd_ring[1].basePA,
121                    (unsigned long)rxq->comp_ring.basePA);
122
123         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
124         PMD_RX_LOG(DEBUG,
125                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[0].size, avail,
127                    rxq->comp_ring.next2proc,
128                    rxq->cmd_ring[0].size - avail);
129
130         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133                    rxq->cmd_ring[1].size - avail);
134
135 }
136
137 static void
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 {
140         uint32_t avail = 0;
141
142         if (txq == NULL)
143                 return;
144
145         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148                    (unsigned long)txq->cmd_ring.basePA,
149                    (unsigned long)txq->comp_ring.basePA,
150                    (unsigned long)txq->data_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174 }
175
176 static void
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
178 {
179         vmxnet3_cmd_ring_release_mbufs(ring);
180         rte_free(ring->buf_info);
181         ring->buf_info = NULL;
182 }
183
184
185 void
186 vmxnet3_dev_tx_queue_release(void *txq)
187 {
188         vmxnet3_tx_queue_t *tq = txq;
189
190         if (tq != NULL) {
191                 /* Release the cmd_ring */
192                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
193         }
194 }
195
196 void
197 vmxnet3_dev_rx_queue_release(void *rxq)
198 {
199         int i;
200         vmxnet3_rx_queue_t *rq = rxq;
201
202         if (rq != NULL) {
203                 /* Release both the cmd_rings */
204                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
206         }
207 }
208
209 static void
210 vmxnet3_dev_tx_queue_reset(void *txq)
211 {
212         vmxnet3_tx_queue_t *tq = txq;
213         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
233
234         memset(ring->base, 0, size);
235 }
236
237 static void
238 vmxnet3_dev_rx_queue_reset(void *rxq)
239 {
240         int i;
241         vmxnet3_rx_queue_t *rq = rxq;
242         struct vmxnet3_cmd_ring *ring0, *ring1;
243         struct vmxnet3_comp_ring *comp_ring;
244         int size;
245
246         if (rq != NULL) {
247                 /* Release both the cmd_rings mbufs */
248                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250         }
251
252         ring0 = &rq->cmd_ring[0];
253         ring1 = &rq->cmd_ring[1];
254         comp_ring = &rq->comp_ring;
255
256         /* Rx vmxnet rings structure initialization */
257         ring0->next2fill = 0;
258         ring1->next2fill = 0;
259         ring0->next2comp = 0;
260         ring1->next2comp = 0;
261         ring0->gen = VMXNET3_INIT_GEN;
262         ring1->gen = VMXNET3_INIT_GEN;
263         comp_ring->next2proc = 0;
264         comp_ring->gen = VMXNET3_INIT_GEN;
265
266         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static inline void
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305                 (comp_ring->base + comp_ring->next2proc);
306
307         while (tcd->gen == comp_ring->gen) {
308                 /* Release cmd_ring descriptor and free mbuf */
309 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
310                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
311 #endif
312                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
313                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
314                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
315                         rte_pktmbuf_free_seg(mbuf);
316
317                         /* Mark the txd for which tcd was generated as completed */
318                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
319                         completed++;
320                 }
321
322                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
323                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
324                                                     comp_ring->next2proc);
325         }
326
327         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
328 }
329
330 uint16_t
331 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
332                   uint16_t nb_pkts)
333 {
334         uint16_t nb_tx;
335         vmxnet3_tx_queue_t *txq = tx_queue;
336         struct vmxnet3_hw *hw = txq->hw;
337
338         if (unlikely(txq->stopped)) {
339                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
340                 return 0;
341         }
342
343         /* Free up the comp_descriptors aggressively */
344         vmxnet3_tq_tx_complete(txq);
345
346         nb_tx = 0;
347         while (nb_tx < nb_pkts) {
348                 Vmxnet3_GenericDesc *gdesc;
349                 vmxnet3_buf_info_t *tbi;
350                 uint32_t first2fill, avail, dw2;
351                 struct rte_mbuf *txm = tx_pkts[nb_tx];
352                 struct rte_mbuf *m_seg = txm;
353
354                 /* Is this packet execessively fragmented, then drop */
355                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
356                         ++txq->stats.drop_too_many_segs;
357                         ++txq->stats.drop_total;
358                         rte_pktmbuf_free(txm);
359                         ++nb_tx;
360                         continue;
361                 }
362
363                 /* Is command ring full? */
364                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
365                 if (txm->nb_segs > avail) {
366                         ++txq->stats.tx_ring_full;
367                         break;
368                 }
369
370                 /* use the previous gen bit for the SOP desc */
371                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
372                 first2fill = txq->cmd_ring.next2fill;
373                 do {
374                         /* Remember the transmit buffer for cleanup */
375                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
376                         tbi->m = m_seg;
377
378                         /* NB: the following assumes that VMXNET3 maximum
379                            transmit buffer size (16K) is greater than
380                            maximum sizeof mbuf segment size. */
381                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
382                         gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
383                         gdesc->dword[2] = dw2 | m_seg->data_len;
384                         gdesc->dword[3] = 0;
385
386                         /* move to the next2fill descriptor */
387                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
388
389                         /* use the right gen for non-SOP desc */
390                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
391                 } while ((m_seg = m_seg->next) != NULL);
392
393                 /* Update the EOP descriptor */
394                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
395
396                 /* Add VLAN tag if present */
397                 gdesc = txq->cmd_ring.base + first2fill;
398                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
399                         gdesc->txd.ti = 1;
400                         gdesc->txd.tci = txm->vlan_tci;
401                 }
402
403                 /* TODO: Add transmit checksum offload here */
404
405                 /* flip the GEN bit on the SOP */
406                 rte_compiler_barrier();
407                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
408
409                 txq->shared->ctrl.txNumDeferred++;
410                 nb_tx++;
411         }
412
413         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
414
415         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
416
417                 txq->shared->ctrl.txNumDeferred = 0;
418                 /* Notify vSwitch that packets are available. */
419                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
420                                        txq->cmd_ring.next2fill);
421         }
422
423         return nb_tx;
424 }
425
426 /*
427  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
428  *  so that device can receive packets in those buffers.
429  *      Ring layout:
430  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
431  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
432  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
433  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
434  *      only for LRO.
435  *
436  */
437 static inline int
438 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
439 {
440         int err = 0;
441         uint32_t i = 0, val = 0;
442         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
443
444         if (ring_id == 0) {
445                 /* Usually: One HEAD type buf per packet
446                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
447                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
448                  */
449
450                 /* We use single packet buffer so all heads here */
451                 val = VMXNET3_RXD_BTYPE_HEAD;
452         } else {
453                 /* All BODY type buffers for 2nd ring */
454                 val = VMXNET3_RXD_BTYPE_BODY;
455         }
456
457         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
458                 struct Vmxnet3_RxDesc *rxd;
459                 struct rte_mbuf *mbuf;
460                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
461
462                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
463
464                 /* Allocate blank mbuf for the current Rx Descriptor */
465                 mbuf = rte_rxmbuf_alloc(rxq->mp);
466                 if (unlikely(mbuf == NULL)) {
467                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
468                         rxq->stats.rx_buf_alloc_failure++;
469                         err = ENOMEM;
470                         break;
471                 }
472
473                 /*
474                  * Load mbuf pointer into buf_info[ring_size]
475                  * buf_info structure is equivalent to cookie for virtio-virtqueue
476                  */
477                 buf_info->m = mbuf;
478                 buf_info->len = (uint16_t)(mbuf->buf_len -
479                                            RTE_PKTMBUF_HEADROOM);
480                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
481
482                 /* Load Rx Descriptor with the buffer's GPA */
483                 rxd->addr = buf_info->bufPA;
484
485                 /* After this point rxd->addr MUST not be NULL */
486                 rxd->btype = val;
487                 rxd->len = buf_info->len;
488                 /* Flip gen bit at the end to change ownership */
489                 rxd->gen = ring->gen;
490
491                 vmxnet3_cmd_ring_adv_next2fill(ring);
492                 i++;
493         }
494
495         /* Return error only if no buffers are posted at present */
496         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
497                 return -err;
498         else
499                 return i;
500 }
501
502
503 /* Receive side checksum and other offloads */
504 static void
505 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
506 {
507         /* Check for hardware stripped VLAN tag */
508         if (rcd->ts) {
509                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
510                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
511         }
512
513         /* Check for RSS */
514         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
515                 rxm->ol_flags |= PKT_RX_RSS_HASH;
516                 rxm->hash.rss = rcd->rssHash;
517         }
518
519         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
520         if (rcd->v4) {
521                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
522                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
523
524                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
525                         rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
526                 else
527                         rxm->ol_flags |= PKT_RX_IPV4_HDR;
528
529                 if (!rcd->cnc) {
530                         if (!rcd->ipc)
531                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
532
533                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
534                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
535                 }
536         }
537 }
538
539 /*
540  * Process the Rx Completion Ring of given vmxnet3_rx_queue
541  * for nb_pkts burst and return the number of packets received
542  */
543 uint16_t
544 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
545 {
546         uint16_t nb_rx;
547         uint32_t nb_rxd, idx;
548         uint8_t ring_idx;
549         vmxnet3_rx_queue_t *rxq;
550         Vmxnet3_RxCompDesc *rcd;
551         vmxnet3_buf_info_t *rbi;
552         Vmxnet3_RxDesc *rxd;
553         struct rte_mbuf *rxm = NULL;
554         struct vmxnet3_hw *hw;
555
556         nb_rx = 0;
557         ring_idx = 0;
558         nb_rxd = 0;
559         idx = 0;
560
561         rxq = rx_queue;
562         hw = rxq->hw;
563
564         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
565
566         if (unlikely(rxq->stopped)) {
567                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
568                 return 0;
569         }
570
571         while (rcd->gen == rxq->comp_ring.gen) {
572                 if (nb_rx >= nb_pkts)
573                         break;
574
575                 idx = rcd->rxdIdx;
576                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
577                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
578                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
579
580                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
581                         rte_pktmbuf_free_seg(rbi->m);
582                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
583                         goto rcd_done;
584                 }
585
586                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
587
588 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
589                 VMXNET3_ASSERT(rcd->len <= rxd->len);
590                 VMXNET3_ASSERT(rbi->m);
591 #endif
592                 if (unlikely(rcd->len == 0)) {
593                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
594                                    ring_idx, idx);
595 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
596                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
597 #endif
598                         rte_pktmbuf_free_seg(rbi->m);
599                         goto rcd_done;
600                 }
601
602                 /* Assuming a packet is coming in a single packet buffer */
603                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
604                         PMD_RX_LOG(DEBUG,
605                                    "Alert : Misbehaving device, incorrect "
606                                    " buffer type used. iPacket dropped.");
607                         rte_pktmbuf_free_seg(rbi->m);
608                         goto rcd_done;
609                 }
610 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
611                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
612 #endif
613                 /* Get the packet buffer pointer from buf_info */
614                 rxm = rbi->m;
615
616                 /* Clear descriptor associated buf_info to be reused */
617                 rbi->m = NULL;
618                 rbi->bufPA = 0;
619
620                 /* Update the index that we received a packet */
621                 rxq->cmd_ring[ring_idx].next2comp = idx;
622
623                 /* For RCD with EOP set, check if there is frame error */
624                 if (unlikely(rcd->err)) {
625                         rxq->stats.drop_total++;
626                         rxq->stats.drop_err++;
627
628                         if (!rcd->fcs) {
629                                 rxq->stats.drop_fcs++;
630                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
631                         }
632                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
633                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
634                                          rxq->comp_ring.base), rcd->rxdIdx);
635                         rte_pktmbuf_free_seg(rxm);
636                         goto rcd_done;
637                 }
638
639
640                 /* Initialize newly received packet buffer */
641                 rxm->port = rxq->port_id;
642                 rxm->nb_segs = 1;
643                 rxm->next = NULL;
644                 rxm->pkt_len = (uint16_t)rcd->len;
645                 rxm->data_len = (uint16_t)rcd->len;
646                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
647                 rxm->ol_flags = 0;
648                 rxm->vlan_tci = 0;
649
650                 vmxnet3_rx_offload(rcd, rxm);
651
652                 rx_pkts[nb_rx++] = rxm;
653 rcd_done:
654                 rxq->cmd_ring[ring_idx].next2comp = idx;
655                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
656
657                 /* It's time to allocate some new buf and renew descriptors */
658                 vmxnet3_post_rx_bufs(rxq, ring_idx);
659                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
660                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
661                                                rxq->cmd_ring[ring_idx].next2fill);
662                 }
663
664                 /* Advance to the next descriptor in comp_ring */
665                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
666
667                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
668                 nb_rxd++;
669                 if (nb_rxd > rxq->cmd_ring[0].size) {
670                         PMD_RX_LOG(ERR,
671                                    "Used up quota of receiving packets,"
672                                    " relinquish control.");
673                         break;
674                 }
675         }
676
677         return nb_rx;
678 }
679
680 /*
681  * Create memzone for device rings. malloc can't be used as the physical address is
682  * needed. If the memzone is already created, then this function returns a ptr
683  * to the old one.
684  */
685 static const struct rte_memzone *
686 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
687                       uint16_t queue_id, uint32_t ring_size, int socket_id)
688 {
689         char z_name[RTE_MEMZONE_NAMESIZE];
690         const struct rte_memzone *mz;
691
692         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
693                         dev->driver->pci_drv.name, ring_name,
694                         dev->data->port_id, queue_id);
695
696         mz = rte_memzone_lookup(z_name);
697         if (mz)
698                 return mz;
699
700         return rte_memzone_reserve_aligned(z_name, ring_size,
701                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
702 }
703
704 int
705 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
706                            uint16_t queue_idx,
707                            uint16_t nb_desc,
708                            unsigned int socket_id,
709                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
710 {
711         struct vmxnet3_hw *hw = dev->data->dev_private;
712         const struct rte_memzone *mz;
713         struct vmxnet3_tx_queue *txq;
714         struct vmxnet3_cmd_ring *ring;
715         struct vmxnet3_comp_ring *comp_ring;
716         struct vmxnet3_data_ring *data_ring;
717         int size;
718
719         PMD_INIT_FUNC_TRACE();
720
721         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
722             ETH_TXQ_FLAGS_NOXSUMS) {
723                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
724                 return -EINVAL;
725         }
726
727         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
728         if (txq == NULL) {
729                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
730                 return -ENOMEM;
731         }
732
733         txq->queue_id = queue_idx;
734         txq->port_id = dev->data->port_id;
735         txq->shared = &hw->tqd_start[queue_idx];
736         txq->hw = hw;
737         txq->qid = queue_idx;
738         txq->stopped = TRUE;
739
740         ring = &txq->cmd_ring;
741         comp_ring = &txq->comp_ring;
742         data_ring = &txq->data_ring;
743
744         /* Tx vmxnet ring length should be between 512-4096 */
745         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
746                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
747                              VMXNET3_DEF_TX_RING_SIZE);
748                 return -EINVAL;
749         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
750                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
751                              VMXNET3_TX_RING_MAX_SIZE);
752                 return -EINVAL;
753         } else {
754                 ring->size = nb_desc;
755                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
756         }
757         comp_ring->size = data_ring->size = ring->size;
758
759         /* Tx vmxnet rings structure initialization*/
760         ring->next2fill = 0;
761         ring->next2comp = 0;
762         ring->gen = VMXNET3_INIT_GEN;
763         comp_ring->next2proc = 0;
764         comp_ring->gen = VMXNET3_INIT_GEN;
765
766         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
767         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
768         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
769
770         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
771         if (mz == NULL) {
772                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
773                 return -ENOMEM;
774         }
775         memset(mz->addr, 0, mz->len);
776
777         /* cmd_ring initialization */
778         ring->base = mz->addr;
779         ring->basePA = mz->phys_addr;
780
781         /* comp_ring initialization */
782         comp_ring->base = ring->base + ring->size;
783         comp_ring->basePA = ring->basePA +
784                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
785
786         /* data_ring initialization */
787         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
788         data_ring->basePA = comp_ring->basePA +
789                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
790
791         /* cmd_ring0 buf_info allocation */
792         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
793                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
794         if (ring->buf_info == NULL) {
795                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
796                 return -ENOMEM;
797         }
798
799         /* Update the data portion with txq */
800         dev->data->tx_queues[queue_idx] = txq;
801
802         return 0;
803 }
804
805 int
806 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
807                            uint16_t queue_idx,
808                            uint16_t nb_desc,
809                            unsigned int socket_id,
810                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
811                            struct rte_mempool *mp)
812 {
813         const struct rte_memzone *mz;
814         struct vmxnet3_rx_queue *rxq;
815         struct vmxnet3_hw     *hw = dev->data->dev_private;
816         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
817         struct vmxnet3_comp_ring *comp_ring;
818         int size;
819         uint8_t i;
820         char mem_name[32];
821         uint16_t buf_size;
822
823         PMD_INIT_FUNC_TRACE();
824
825         buf_size = rte_pktmbuf_data_room_size(mp) -
826                 RTE_PKTMBUF_HEADROOM;
827
828         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
829                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
830                              "VMXNET3 don't support scatter packets yet",
831                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
832                 return -EINVAL;
833         }
834
835         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
836         if (rxq == NULL) {
837                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
838                 return -ENOMEM;
839         }
840
841         rxq->mp = mp;
842         rxq->queue_id = queue_idx;
843         rxq->port_id = dev->data->port_id;
844         rxq->shared = &hw->rqd_start[queue_idx];
845         rxq->hw = hw;
846         rxq->qid1 = queue_idx;
847         rxq->qid2 = queue_idx + hw->num_rx_queues;
848         rxq->stopped = TRUE;
849
850         ring0 = &rxq->cmd_ring[0];
851         ring1 = &rxq->cmd_ring[1];
852         comp_ring = &rxq->comp_ring;
853
854         /* Rx vmxnet rings length should be between 256-4096 */
855         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
856                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
857                 return -EINVAL;
858         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
859                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
860                 return -EINVAL;
861         } else {
862                 ring0->size = nb_desc;
863                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
864                 ring1->size = ring0->size;
865         }
866
867         comp_ring->size = ring0->size + ring1->size;
868
869         /* Rx vmxnet rings structure initialization */
870         ring0->next2fill = 0;
871         ring1->next2fill = 0;
872         ring0->next2comp = 0;
873         ring1->next2comp = 0;
874         ring0->gen = VMXNET3_INIT_GEN;
875         ring1->gen = VMXNET3_INIT_GEN;
876         comp_ring->next2proc = 0;
877         comp_ring->gen = VMXNET3_INIT_GEN;
878
879         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
880         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
881
882         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
883         if (mz == NULL) {
884                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
885                 return -ENOMEM;
886         }
887         memset(mz->addr, 0, mz->len);
888
889         /* cmd_ring0 initialization */
890         ring0->base = mz->addr;
891         ring0->basePA = mz->phys_addr;
892
893         /* cmd_ring1 initialization */
894         ring1->base = ring0->base + ring0->size;
895         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
896
897         /* comp_ring initialization */
898         comp_ring->base = ring1->base + ring1->size;
899         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
900                 ring1->size;
901
902         /* cmd_ring0-cmd_ring1 buf_info allocation */
903         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
904
905                 ring = &rxq->cmd_ring[i];
906                 ring->rid = i;
907                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
908
909                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
910                 if (ring->buf_info == NULL) {
911                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
912                         return -ENOMEM;
913                 }
914         }
915
916         /* Update the data portion with rxq */
917         dev->data->rx_queues[queue_idx] = rxq;
918
919         return 0;
920 }
921
922 /*
923  * Initializes Receive Unit
924  * Load mbufs in rx queue in advance
925  */
926 int
927 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
928 {
929         struct vmxnet3_hw *hw = dev->data->dev_private;
930
931         int i, ret;
932         uint8_t j;
933
934         PMD_INIT_FUNC_TRACE();
935
936         for (i = 0; i < hw->num_rx_queues; i++) {
937                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
938
939                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
940                         /* Passing 0 as alloc_num will allocate full ring */
941                         ret = vmxnet3_post_rx_bufs(rxq, j);
942                         if (ret <= 0) {
943                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
944                                 return -ret;
945                         }
946                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
947                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
948                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
949                                                        rxq->cmd_ring[j].next2fill);
950                         }
951                 }
952                 rxq->stopped = FALSE;
953         }
954
955         for (i = 0; i < dev->data->nb_tx_queues; i++) {
956                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
957
958                 txq->stopped = FALSE;
959         }
960
961         return 0;
962 }
963
964 static uint8_t rss_intel_key[40] = {
965         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
966         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
967         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
968         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
969         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
970 };
971
972 /*
973  * Configure RSS feature
974  */
975 int
976 vmxnet3_rss_configure(struct rte_eth_dev *dev)
977 {
978         struct vmxnet3_hw *hw = dev->data->dev_private;
979         struct VMXNET3_RSSConf *dev_rss_conf;
980         struct rte_eth_rss_conf *port_rss_conf;
981         uint64_t rss_hf;
982         uint8_t i, j;
983
984         PMD_INIT_FUNC_TRACE();
985
986         dev_rss_conf = hw->rss_conf;
987         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
988
989         /* loading hashFunc */
990         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
991         /* loading hashKeySize */
992         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
993         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
994         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
995
996         if (port_rss_conf->rss_key == NULL) {
997                 /* Default hash key */
998                 port_rss_conf->rss_key = rss_intel_key;
999         }
1000
1001         /* loading hashKey */
1002         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1003
1004         /* loading indTable */
1005         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1006                 if (j == dev->data->nb_rx_queues)
1007                         j = 0;
1008                 dev_rss_conf->indTable[i] = j;
1009         }
1010
1011         /* loading hashType */
1012         dev_rss_conf->hashType = 0;
1013         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1014         if (rss_hf & ETH_RSS_IPV4)
1015                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1016         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1017                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1018         if (rss_hf & ETH_RSS_IPV6)
1019                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1020         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1021                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1022
1023         return VMXNET3_SUCCESS;
1024 }