c0cbacb7cf0fec72fabb2966b5e4c7ab3066d434
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
82
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
85
86 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
87
88 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
89 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
93 #endif
94
95 static struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
97 {
98         struct rte_mbuf *m;
99
100         m = __rte_mbuf_raw_alloc(mp);
101         __rte_mbuf_sanity_check_raw(m, 0);
102         return m;
103 }
104
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
106 static void
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
108 {
109         uint32_t avail = 0;
110
111         if (rxq == NULL)
112                 return;
113
114         PMD_RX_LOG(DEBUG,
115                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG,
118                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119                    (unsigned long)rxq->cmd_ring[0].basePA,
120                    (unsigned long)rxq->cmd_ring[1].basePA,
121                    (unsigned long)rxq->comp_ring.basePA);
122
123         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
124         PMD_RX_LOG(DEBUG,
125                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[0].size, avail,
127                    rxq->comp_ring.next2proc,
128                    rxq->cmd_ring[0].size - avail);
129
130         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133                    rxq->cmd_ring[1].size - avail);
134
135 }
136
137 static void
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 {
140         uint32_t avail = 0;
141
142         if (txq == NULL)
143                 return;
144
145         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148                    (unsigned long)txq->cmd_ring.basePA,
149                    (unsigned long)txq->comp_ring.basePA,
150                    (unsigned long)txq->data_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static void
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174 }
175
176 static void
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
178 {
179         vmxnet3_cmd_ring_release_mbufs(ring);
180         rte_free(ring->buf_info);
181         ring->buf_info = NULL;
182 }
183
184
185 void
186 vmxnet3_dev_tx_queue_release(void *txq)
187 {
188         vmxnet3_tx_queue_t *tq = txq;
189
190         if (tq != NULL) {
191                 /* Release the cmd_ring */
192                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
193         }
194 }
195
196 void
197 vmxnet3_dev_rx_queue_release(void *rxq)
198 {
199         int i;
200         vmxnet3_rx_queue_t *rq = rxq;
201
202         if (rq != NULL) {
203                 /* Release both the cmd_rings */
204                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
206         }
207 }
208
209 static void
210 vmxnet3_dev_tx_queue_reset(void *txq)
211 {
212         vmxnet3_tx_queue_t *tq = txq;
213         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
233
234         memset(ring->base, 0, size);
235 }
236
237 static void
238 vmxnet3_dev_rx_queue_reset(void *rxq)
239 {
240         int i;
241         vmxnet3_rx_queue_t *rq = rxq;
242         struct vmxnet3_cmd_ring *ring0, *ring1;
243         struct vmxnet3_comp_ring *comp_ring;
244         int size;
245
246         if (rq != NULL) {
247                 /* Release both the cmd_rings mbufs */
248                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250         }
251
252         ring0 = &rq->cmd_ring[0];
253         ring1 = &rq->cmd_ring[1];
254         comp_ring = &rq->comp_ring;
255
256         /* Rx vmxnet rings structure initialization */
257         ring0->next2fill = 0;
258         ring1->next2fill = 0;
259         ring0->next2comp = 0;
260         ring1->next2comp = 0;
261         ring0->gen = VMXNET3_INIT_GEN;
262         ring1->gen = VMXNET3_INIT_GEN;
263         comp_ring->next2proc = 0;
264         comp_ring->gen = VMXNET3_INIT_GEN;
265
266         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static void
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305                 (comp_ring->base + comp_ring->next2proc);
306
307         while (tcd->gen == comp_ring->gen) {
308                 /* Release cmd_ring descriptor and free mbuf */
309                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
310                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
311                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
312                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
313                         rte_pktmbuf_free_seg(mbuf);
314
315                         /* Mark the txd for which tcd was generated as completed */
316                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                         completed++;
318                 }
319
320                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
321                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
322                                                     comp_ring->next2proc);
323         }
324
325         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
326 }
327
328 uint16_t
329 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
330                   uint16_t nb_pkts)
331 {
332         uint16_t nb_tx;
333         vmxnet3_tx_queue_t *txq = tx_queue;
334         struct vmxnet3_hw *hw = txq->hw;
335
336         if (unlikely(txq->stopped)) {
337                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
338                 return 0;
339         }
340
341         /* Free up the comp_descriptors aggressively */
342         vmxnet3_tq_tx_complete(txq);
343
344         nb_tx = 0;
345         while (nb_tx < nb_pkts) {
346                 Vmxnet3_GenericDesc *gdesc;
347                 vmxnet3_buf_info_t *tbi;
348                 uint32_t first2fill, avail, dw2;
349                 struct rte_mbuf *txm = tx_pkts[nb_tx];
350                 struct rte_mbuf *m_seg = txm;
351
352                 /* Is this packet execessively fragmented, then drop */
353                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
354                         ++txq->stats.drop_too_many_segs;
355                         ++txq->stats.drop_total;
356                         rte_pktmbuf_free(txm);
357                         ++nb_tx;
358                         continue;
359                 }
360
361                 /* Is command ring full? */
362                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
363                 if (txm->nb_segs > avail) {
364                         ++txq->stats.tx_ring_full;
365                         break;
366                 }
367
368                 /* use the previous gen bit for the SOP desc */
369                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
370                 first2fill = txq->cmd_ring.next2fill;
371                 do {
372                         /* Remember the transmit buffer for cleanup */
373                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
374                         tbi->m = m_seg;
375
376                         /* NB: the following assumes that VMXNET3 maximum
377                            transmit buffer size (16K) is greater than
378                            maximum sizeof mbuf segment size. */
379                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
380                         gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
381                         gdesc->dword[2] = dw2 | m_seg->data_len;
382                         gdesc->dword[3] = 0;
383
384                         /* move to the next2fill descriptor */
385                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
386
387                         /* use the right gen for non-SOP desc */
388                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
389                 } while ((m_seg = m_seg->next) != NULL);
390
391                 /* Update the EOP descriptor */
392                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
393
394                 /* Add VLAN tag if present */
395                 gdesc = txq->cmd_ring.base + first2fill;
396                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
397                         gdesc->txd.ti = 1;
398                         gdesc->txd.tci = txm->vlan_tci;
399                 }
400
401                 /* TODO: Add transmit checksum offload here */
402
403                 /* flip the GEN bit on the SOP */
404                 rte_compiler_barrier();
405                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
406
407                 txq->shared->ctrl.txNumDeferred++;
408                 nb_tx++;
409         }
410
411         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
412
413         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
414
415                 txq->shared->ctrl.txNumDeferred = 0;
416                 /* Notify vSwitch that packets are available. */
417                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
418                                        txq->cmd_ring.next2fill);
419         }
420
421         return nb_tx;
422 }
423
424 /*
425  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
426  *  so that device can receive packets in those buffers.
427  *      Ring layout:
428  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
429  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
430  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
431  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
432  *      only for LRO.
433  *
434  */
435 static int
436 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
437 {
438         int err = 0;
439         uint32_t i = 0, val = 0;
440         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
441
442         if (ring_id == 0) {
443                 /* Usually: One HEAD type buf per packet
444                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
445                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
446                  */
447
448                 /* We use single packet buffer so all heads here */
449                 val = VMXNET3_RXD_BTYPE_HEAD;
450         } else {
451                 /* All BODY type buffers for 2nd ring */
452                 val = VMXNET3_RXD_BTYPE_BODY;
453         }
454
455         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
456                 struct Vmxnet3_RxDesc *rxd;
457                 struct rte_mbuf *mbuf;
458                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
459
460                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
461
462                 /* Allocate blank mbuf for the current Rx Descriptor */
463                 mbuf = rte_rxmbuf_alloc(rxq->mp);
464                 if (unlikely(mbuf == NULL)) {
465                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
466                         rxq->stats.rx_buf_alloc_failure++;
467                         err = ENOMEM;
468                         break;
469                 }
470
471                 /*
472                  * Load mbuf pointer into buf_info[ring_size]
473                  * buf_info structure is equivalent to cookie for virtio-virtqueue
474                  */
475                 buf_info->m = mbuf;
476                 buf_info->len = (uint16_t)(mbuf->buf_len -
477                                            RTE_PKTMBUF_HEADROOM);
478                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
479
480                 /* Load Rx Descriptor with the buffer's GPA */
481                 rxd->addr = buf_info->bufPA;
482
483                 /* After this point rxd->addr MUST not be NULL */
484                 rxd->btype = val;
485                 rxd->len = buf_info->len;
486                 /* Flip gen bit at the end to change ownership */
487                 rxd->gen = ring->gen;
488
489                 vmxnet3_cmd_ring_adv_next2fill(ring);
490                 i++;
491         }
492
493         /* Return error only if no buffers are posted at present */
494         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
495                 return -err;
496         else
497                 return i;
498 }
499
500
501 /* Receive side checksum and other offloads */
502 static void
503 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
504 {
505         /* Check for hardware stripped VLAN tag */
506         if (rcd->ts) {
507                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
508                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
509         }
510
511         /* Check for RSS */
512         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
513                 rxm->ol_flags |= PKT_RX_RSS_HASH;
514                 rxm->hash.rss = rcd->rssHash;
515         }
516
517         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
518         if (rcd->v4) {
519                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
520                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
521
522                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
523                         rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
524                 else
525                         rxm->ol_flags |= PKT_RX_IPV4_HDR;
526
527                 if (!rcd->cnc) {
528                         if (!rcd->ipc)
529                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
530
531                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
532                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
533                 }
534         }
535 }
536
537 /*
538  * Process the Rx Completion Ring of given vmxnet3_rx_queue
539  * for nb_pkts burst and return the number of packets received
540  */
541 uint16_t
542 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
543 {
544         uint16_t nb_rx;
545         uint32_t nb_rxd, idx;
546         uint8_t ring_idx;
547         vmxnet3_rx_queue_t *rxq;
548         Vmxnet3_RxCompDesc *rcd;
549         vmxnet3_buf_info_t *rbi;
550         Vmxnet3_RxDesc *rxd;
551         struct rte_mbuf *rxm = NULL;
552         struct vmxnet3_hw *hw;
553
554         nb_rx = 0;
555         ring_idx = 0;
556         nb_rxd = 0;
557         idx = 0;
558
559         rxq = rx_queue;
560         hw = rxq->hw;
561
562         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
563
564         if (unlikely(rxq->stopped)) {
565                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
566                 return 0;
567         }
568
569         while (rcd->gen == rxq->comp_ring.gen) {
570                 if (nb_rx >= nb_pkts)
571                         break;
572
573                 idx = rcd->rxdIdx;
574                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
575                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
576                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
577
578                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
579                         rte_pktmbuf_free_seg(rbi->m);
580                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
581                         goto rcd_done;
582                 }
583
584                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
585
586                 VMXNET3_ASSERT(rcd->len <= rxd->len);
587                 VMXNET3_ASSERT(rbi->m);
588
589                 if (unlikely(rcd->len == 0)) {
590                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
591                                    ring_idx, idx);
592                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
593                         rte_pktmbuf_free_seg(rbi->m);
594                         goto rcd_done;
595                 }
596
597                 /* Assuming a packet is coming in a single packet buffer */
598                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
599                         PMD_RX_LOG(DEBUG,
600                                    "Alert : Misbehaving device, incorrect "
601                                    " buffer type used. iPacket dropped.");
602                         rte_pktmbuf_free_seg(rbi->m);
603                         goto rcd_done;
604                 }
605                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
606
607                 /* Get the packet buffer pointer from buf_info */
608                 rxm = rbi->m;
609
610                 /* Clear descriptor associated buf_info to be reused */
611                 rbi->m = NULL;
612                 rbi->bufPA = 0;
613
614                 /* Update the index that we received a packet */
615                 rxq->cmd_ring[ring_idx].next2comp = idx;
616
617                 /* For RCD with EOP set, check if there is frame error */
618                 if (unlikely(rcd->err)) {
619                         rxq->stats.drop_total++;
620                         rxq->stats.drop_err++;
621
622                         if (!rcd->fcs) {
623                                 rxq->stats.drop_fcs++;
624                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
625                         }
626                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
627                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
628                                          rxq->comp_ring.base), rcd->rxdIdx);
629                         rte_pktmbuf_free_seg(rxm);
630                         goto rcd_done;
631                 }
632
633
634                 /* Initialize newly received packet buffer */
635                 rxm->port = rxq->port_id;
636                 rxm->nb_segs = 1;
637                 rxm->next = NULL;
638                 rxm->pkt_len = (uint16_t)rcd->len;
639                 rxm->data_len = (uint16_t)rcd->len;
640                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
641                 rxm->ol_flags = 0;
642                 rxm->vlan_tci = 0;
643
644                 vmxnet3_rx_offload(rcd, rxm);
645
646                 rx_pkts[nb_rx++] = rxm;
647 rcd_done:
648                 rxq->cmd_ring[ring_idx].next2comp = idx;
649                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
650
651                 /* It's time to allocate some new buf and renew descriptors */
652                 vmxnet3_post_rx_bufs(rxq, ring_idx);
653                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
654                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
655                                                rxq->cmd_ring[ring_idx].next2fill);
656                 }
657
658                 /* Advance to the next descriptor in comp_ring */
659                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
660
661                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
662                 nb_rxd++;
663                 if (nb_rxd > rxq->cmd_ring[0].size) {
664                         PMD_RX_LOG(ERR,
665                                    "Used up quota of receiving packets,"
666                                    " relinquish control.");
667                         break;
668                 }
669         }
670
671         return nb_rx;
672 }
673
674 /*
675  * Create memzone for device rings. malloc can't be used as the physical address is
676  * needed. If the memzone is already created, then this function returns a ptr
677  * to the old one.
678  */
679 static const struct rte_memzone *
680 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
681                       uint16_t queue_id, uint32_t ring_size, int socket_id)
682 {
683         char z_name[RTE_MEMZONE_NAMESIZE];
684         const struct rte_memzone *mz;
685
686         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
687                         dev->driver->pci_drv.name, ring_name,
688                         dev->data->port_id, queue_id);
689
690         mz = rte_memzone_lookup(z_name);
691         if (mz)
692                 return mz;
693
694         return rte_memzone_reserve_aligned(z_name, ring_size,
695                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
696 }
697
698 int
699 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
700                            uint16_t queue_idx,
701                            uint16_t nb_desc,
702                            unsigned int socket_id,
703                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
704 {
705         struct vmxnet3_hw *hw = dev->data->dev_private;
706         const struct rte_memzone *mz;
707         struct vmxnet3_tx_queue *txq;
708         struct vmxnet3_cmd_ring *ring;
709         struct vmxnet3_comp_ring *comp_ring;
710         struct vmxnet3_data_ring *data_ring;
711         int size;
712
713         PMD_INIT_FUNC_TRACE();
714
715         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
716             ETH_TXQ_FLAGS_NOXSUMS) {
717                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
718                 return -EINVAL;
719         }
720
721         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
722         if (txq == NULL) {
723                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
724                 return -ENOMEM;
725         }
726
727         txq->queue_id = queue_idx;
728         txq->port_id = dev->data->port_id;
729         txq->shared = &hw->tqd_start[queue_idx];
730         txq->hw = hw;
731         txq->qid = queue_idx;
732         txq->stopped = TRUE;
733
734         ring = &txq->cmd_ring;
735         comp_ring = &txq->comp_ring;
736         data_ring = &txq->data_ring;
737
738         /* Tx vmxnet ring length should be between 512-4096 */
739         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
740                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
741                              VMXNET3_DEF_TX_RING_SIZE);
742                 return -EINVAL;
743         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
744                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
745                              VMXNET3_TX_RING_MAX_SIZE);
746                 return -EINVAL;
747         } else {
748                 ring->size = nb_desc;
749                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
750         }
751         comp_ring->size = data_ring->size = ring->size;
752
753         /* Tx vmxnet rings structure initialization*/
754         ring->next2fill = 0;
755         ring->next2comp = 0;
756         ring->gen = VMXNET3_INIT_GEN;
757         comp_ring->next2proc = 0;
758         comp_ring->gen = VMXNET3_INIT_GEN;
759
760         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
761         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
762         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
763
764         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
765         if (mz == NULL) {
766                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
767                 return -ENOMEM;
768         }
769         memset(mz->addr, 0, mz->len);
770
771         /* cmd_ring initialization */
772         ring->base = mz->addr;
773         ring->basePA = mz->phys_addr;
774
775         /* comp_ring initialization */
776         comp_ring->base = ring->base + ring->size;
777         comp_ring->basePA = ring->basePA +
778                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
779
780         /* data_ring initialization */
781         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
782         data_ring->basePA = comp_ring->basePA +
783                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
784
785         /* cmd_ring0 buf_info allocation */
786         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
787                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
788         if (ring->buf_info == NULL) {
789                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
790                 return -ENOMEM;
791         }
792
793         /* Update the data portion with txq */
794         dev->data->tx_queues[queue_idx] = txq;
795
796         return 0;
797 }
798
799 int
800 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
801                            uint16_t queue_idx,
802                            uint16_t nb_desc,
803                            unsigned int socket_id,
804                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
805                            struct rte_mempool *mp)
806 {
807         const struct rte_memzone *mz;
808         struct vmxnet3_rx_queue *rxq;
809         struct vmxnet3_hw     *hw = dev->data->dev_private;
810         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
811         struct vmxnet3_comp_ring *comp_ring;
812         int size;
813         uint8_t i;
814         char mem_name[32];
815         uint16_t buf_size;
816
817         PMD_INIT_FUNC_TRACE();
818
819         buf_size = rte_pktmbuf_data_room_size(mp) -
820                 RTE_PKTMBUF_HEADROOM;
821
822         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
823                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
824                              "VMXNET3 don't support scatter packets yet",
825                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
826                 return -EINVAL;
827         }
828
829         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
830         if (rxq == NULL) {
831                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
832                 return -ENOMEM;
833         }
834
835         rxq->mp = mp;
836         rxq->queue_id = queue_idx;
837         rxq->port_id = dev->data->port_id;
838         rxq->shared = &hw->rqd_start[queue_idx];
839         rxq->hw = hw;
840         rxq->qid1 = queue_idx;
841         rxq->qid2 = queue_idx + hw->num_rx_queues;
842         rxq->stopped = TRUE;
843
844         ring0 = &rxq->cmd_ring[0];
845         ring1 = &rxq->cmd_ring[1];
846         comp_ring = &rxq->comp_ring;
847
848         /* Rx vmxnet rings length should be between 256-4096 */
849         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
850                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
851                 return -EINVAL;
852         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
853                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
854                 return -EINVAL;
855         } else {
856                 ring0->size = nb_desc;
857                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
858                 ring1->size = ring0->size;
859         }
860
861         comp_ring->size = ring0->size + ring1->size;
862
863         /* Rx vmxnet rings structure initialization */
864         ring0->next2fill = 0;
865         ring1->next2fill = 0;
866         ring0->next2comp = 0;
867         ring1->next2comp = 0;
868         ring0->gen = VMXNET3_INIT_GEN;
869         ring1->gen = VMXNET3_INIT_GEN;
870         comp_ring->next2proc = 0;
871         comp_ring->gen = VMXNET3_INIT_GEN;
872
873         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
874         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
875
876         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
877         if (mz == NULL) {
878                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
879                 return -ENOMEM;
880         }
881         memset(mz->addr, 0, mz->len);
882
883         /* cmd_ring0 initialization */
884         ring0->base = mz->addr;
885         ring0->basePA = mz->phys_addr;
886
887         /* cmd_ring1 initialization */
888         ring1->base = ring0->base + ring0->size;
889         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
890
891         /* comp_ring initialization */
892         comp_ring->base = ring1->base + ring1->size;
893         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
894                 ring1->size;
895
896         /* cmd_ring0-cmd_ring1 buf_info allocation */
897         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
898
899                 ring = &rxq->cmd_ring[i];
900                 ring->rid = i;
901                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
902
903                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
904                 if (ring->buf_info == NULL) {
905                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
906                         return -ENOMEM;
907                 }
908         }
909
910         /* Update the data portion with rxq */
911         dev->data->rx_queues[queue_idx] = rxq;
912
913         return 0;
914 }
915
916 /*
917  * Initializes Receive Unit
918  * Load mbufs in rx queue in advance
919  */
920 int
921 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
922 {
923         struct vmxnet3_hw *hw = dev->data->dev_private;
924
925         int i, ret;
926         uint8_t j;
927
928         PMD_INIT_FUNC_TRACE();
929
930         for (i = 0; i < hw->num_rx_queues; i++) {
931                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
932
933                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
934                         /* Passing 0 as alloc_num will allocate full ring */
935                         ret = vmxnet3_post_rx_bufs(rxq, j);
936                         if (ret <= 0) {
937                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
938                                 return -ret;
939                         }
940                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
941                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
942                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
943                                                        rxq->cmd_ring[j].next2fill);
944                         }
945                 }
946                 rxq->stopped = FALSE;
947         }
948
949         for (i = 0; i < dev->data->nb_tx_queues; i++) {
950                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
951
952                 txq->stopped = FALSE;
953         }
954
955         return 0;
956 }
957
958 static uint8_t rss_intel_key[40] = {
959         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
960         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
961         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
962         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
963         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
964 };
965
966 /*
967  * Configure RSS feature
968  */
969 int
970 vmxnet3_rss_configure(struct rte_eth_dev *dev)
971 {
972         struct vmxnet3_hw *hw = dev->data->dev_private;
973         struct VMXNET3_RSSConf *dev_rss_conf;
974         struct rte_eth_rss_conf *port_rss_conf;
975         uint64_t rss_hf;
976         uint8_t i, j;
977
978         PMD_INIT_FUNC_TRACE();
979
980         dev_rss_conf = hw->rss_conf;
981         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
982
983         /* loading hashFunc */
984         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
985         /* loading hashKeySize */
986         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
987         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
988         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
989
990         if (port_rss_conf->rss_key == NULL) {
991                 /* Default hash key */
992                 port_rss_conf->rss_key = rss_intel_key;
993         }
994
995         /* loading hashKey */
996         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
997
998         /* loading indTable */
999         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1000                 if (j == dev->data->nb_rx_queues)
1001                         j = 0;
1002                 dev_rss_conf->indTable[i] = j;
1003         }
1004
1005         /* loading hashType */
1006         dev_rss_conf->hashType = 0;
1007         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1008         if (rss_hf & ETH_RSS_IPV4)
1009                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1010         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1011                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1012         if (rss_hf & ETH_RSS_IPV6)
1013                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1014         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1015                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1016
1017         return VMXNET3_SUCCESS;
1018 }