tailq: remove unneeded inclusions
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
82
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
85
86 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
87
88 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
89 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
93 #endif
94
95 static inline struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
97 {
98         struct rte_mbuf *m;
99
100         m = __rte_mbuf_raw_alloc(mp);
101         __rte_mbuf_sanity_check_raw(m, 0);
102         return m;
103 }
104
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
106 static void
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
108 {
109         uint32_t avail = 0;
110
111         if (rxq == NULL)
112                 return;
113
114         PMD_RX_LOG(DEBUG,
115                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG,
118                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119                    (unsigned long)rxq->cmd_ring[0].basePA,
120                    (unsigned long)rxq->cmd_ring[1].basePA,
121                    (unsigned long)rxq->comp_ring.basePA);
122
123         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
124         PMD_RX_LOG(DEBUG,
125                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[0].size, avail,
127                    rxq->comp_ring.next2proc,
128                    rxq->cmd_ring[0].size - avail);
129
130         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133                    rxq->cmd_ring[1].size - avail);
134
135 }
136
137 static void
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 {
140         uint32_t avail = 0;
141
142         if (txq == NULL)
143                 return;
144
145         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148                    (unsigned long)txq->cmd_ring.basePA,
149                    (unsigned long)txq->comp_ring.basePA,
150                    (unsigned long)txq->data_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174 }
175
176 static void
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
178 {
179         vmxnet3_cmd_ring_release_mbufs(ring);
180         rte_free(ring->buf_info);
181         ring->buf_info = NULL;
182 }
183
184
185 void
186 vmxnet3_dev_tx_queue_release(void *txq)
187 {
188         vmxnet3_tx_queue_t *tq = txq;
189
190         if (tq != NULL) {
191                 /* Release the cmd_ring */
192                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
193         }
194 }
195
196 void
197 vmxnet3_dev_rx_queue_release(void *rxq)
198 {
199         int i;
200         vmxnet3_rx_queue_t *rq = rxq;
201
202         if (rq != NULL) {
203                 /* Release both the cmd_rings */
204                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
206         }
207 }
208
209 static void
210 vmxnet3_dev_tx_queue_reset(void *txq)
211 {
212         vmxnet3_tx_queue_t *tq = txq;
213         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
233
234         memset(ring->base, 0, size);
235 }
236
237 static void
238 vmxnet3_dev_rx_queue_reset(void *rxq)
239 {
240         int i;
241         vmxnet3_rx_queue_t *rq = rxq;
242         struct vmxnet3_cmd_ring *ring0, *ring1;
243         struct vmxnet3_comp_ring *comp_ring;
244         int size;
245
246         if (rq != NULL) {
247                 /* Release both the cmd_rings mbufs */
248                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250         }
251
252         ring0 = &rq->cmd_ring[0];
253         ring1 = &rq->cmd_ring[1];
254         comp_ring = &rq->comp_ring;
255
256         /* Rx vmxnet rings structure initialization */
257         ring0->next2fill = 0;
258         ring1->next2fill = 0;
259         ring0->next2comp = 0;
260         ring1->next2comp = 0;
261         ring0->gen = VMXNET3_INIT_GEN;
262         ring1->gen = VMXNET3_INIT_GEN;
263         comp_ring->next2proc = 0;
264         comp_ring->gen = VMXNET3_INIT_GEN;
265
266         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static inline void
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305                 (comp_ring->base + comp_ring->next2proc);
306
307         while (tcd->gen == comp_ring->gen) {
308
309                 /* Release cmd_ring descriptor and free mbuf */
310 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
311                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
312 #endif
313                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
314                 if (unlikely(mbuf == NULL))
315                         rte_panic("EOP desc does not point to a valid mbuf");
316                 else
317                         rte_pktmbuf_free(mbuf);
318
319
320                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
321                 /* Mark the txd for which tcd was generated as completed */
322                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
325                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
326                                                     comp_ring->next2proc);
327                 completed++;
328         }
329
330         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
331 }
332
333 uint16_t
334 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
335                   uint16_t nb_pkts)
336 {
337         uint16_t nb_tx;
338         Vmxnet3_TxDesc *txd = NULL;
339         vmxnet3_buf_info_t *tbi = NULL;
340         struct vmxnet3_hw *hw;
341         struct rte_mbuf *txm;
342         vmxnet3_tx_queue_t *txq = tx_queue;
343
344         hw = txq->hw;
345
346         if (unlikely(txq->stopped)) {
347                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
348                 return 0;
349         }
350
351         /* Free up the comp_descriptors aggressively */
352         vmxnet3_tq_tx_complete(txq);
353
354         nb_tx = 0;
355         while (nb_tx < nb_pkts) {
356
357                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
358                         int copy_size = 0;
359
360                         txm = tx_pkts[nb_tx];
361                         /* Don't support scatter packets yet, free them if met */
362                         if (txm->nb_segs != 1) {
363                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
364                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
365                                 txq->stats.drop_total++;
366
367                                 nb_tx++;
368                                 continue;
369                         }
370
371                         /* Needs to minus ether header len */
372                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
373                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
374                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
375                                 txq->stats.drop_total++;
376
377                                 nb_tx++;
378                                 continue;
379                         }
380
381                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
382                         if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
383                                 struct Vmxnet3_TxDataDesc *tdd;
384
385                                 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
386                                 copy_size = rte_pktmbuf_pkt_len(txm);
387                                 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
388                         }
389
390                         /* Fill the tx descriptor */
391                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
392                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
393                         if (copy_size)
394                                 txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
395                                                         txq->cmd_ring.next2fill *
396                                                         sizeof(struct Vmxnet3_TxDataDesc));
397                         else
398                                 txd->addr = tbi->bufPA;
399                         txd->len = txm->data_len;
400
401                         /* Mark the last descriptor as End of Packet. */
402                         txd->cq = 1;
403                         txd->eop = 1;
404
405                         /* Add VLAN tag if requested */
406                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
407                                 txd->ti = 1;
408                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
409                         }
410
411                         /* Record current mbuf for freeing it later in tx complete */
412 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
413                         VMXNET3_ASSERT(txm);
414 #endif
415                         tbi->m = txm;
416
417                         /* Set the offloading mode to default */
418                         txd->hlen = 0;
419                         txd->om = VMXNET3_OM_NONE;
420                         txd->msscof = 0;
421
422                         /* finally flip the GEN bit of the SOP desc  */
423                         txd->gen = txq->cmd_ring.gen;
424                         txq->shared->ctrl.txNumDeferred++;
425
426                         /* move to the next2fill descriptor */
427                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
428                         nb_tx++;
429
430                 } else {
431                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
432                         txq->stats.drop_total += (nb_pkts - nb_tx);
433                         break;
434                 }
435         }
436
437         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
438
439         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
440
441                 txq->shared->ctrl.txNumDeferred = 0;
442                 /* Notify vSwitch that packets are available. */
443                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
444                                        txq->cmd_ring.next2fill);
445         }
446
447         return nb_tx;
448 }
449
450 /*
451  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
452  *  so that device can receive packets in those buffers.
453  *      Ring layout:
454  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
455  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
456  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
457  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
458  *      only for LRO.
459  *
460  */
461 static inline int
462 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
463 {
464         int err = 0;
465         uint32_t i = 0, val = 0;
466         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
467
468         if (ring_id == 0) {
469                 /* Usually: One HEAD type buf per packet
470                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
471                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
472                  */
473
474                 /* We use single packet buffer so all heads here */
475                 val = VMXNET3_RXD_BTYPE_HEAD;
476         } else {
477                 /* All BODY type buffers for 2nd ring */
478                 val = VMXNET3_RXD_BTYPE_BODY;
479         }
480
481         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
482                 struct Vmxnet3_RxDesc *rxd;
483                 struct rte_mbuf *mbuf;
484                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
485
486                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
487
488                 /* Allocate blank mbuf for the current Rx Descriptor */
489                 mbuf = rte_rxmbuf_alloc(rxq->mp);
490                 if (unlikely(mbuf == NULL)) {
491                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
492                         rxq->stats.rx_buf_alloc_failure++;
493                         err = ENOMEM;
494                         break;
495                 }
496
497                 /*
498                  * Load mbuf pointer into buf_info[ring_size]
499                  * buf_info structure is equivalent to cookie for virtio-virtqueue
500                  */
501                 buf_info->m = mbuf;
502                 buf_info->len = (uint16_t)(mbuf->buf_len -
503                                            RTE_PKTMBUF_HEADROOM);
504                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
505
506                 /* Load Rx Descriptor with the buffer's GPA */
507                 rxd->addr = buf_info->bufPA;
508
509                 /* After this point rxd->addr MUST not be NULL */
510                 rxd->btype = val;
511                 rxd->len = buf_info->len;
512                 /* Flip gen bit at the end to change ownership */
513                 rxd->gen = ring->gen;
514
515                 vmxnet3_cmd_ring_adv_next2fill(ring);
516                 i++;
517         }
518
519         /* Return error only if no buffers are posted at present */
520         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
521                 return -err;
522         else
523                 return i;
524 }
525
526 /*
527  * Process the Rx Completion Ring of given vmxnet3_rx_queue
528  * for nb_pkts burst and return the number of packets received
529  */
530 uint16_t
531 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
532 {
533         uint16_t nb_rx;
534         uint32_t nb_rxd, idx;
535         uint8_t ring_idx;
536         vmxnet3_rx_queue_t *rxq;
537         Vmxnet3_RxCompDesc *rcd;
538         vmxnet3_buf_info_t *rbi;
539         Vmxnet3_RxDesc *rxd;
540         struct rte_mbuf *rxm = NULL;
541         struct vmxnet3_hw *hw;
542
543         nb_rx = 0;
544         ring_idx = 0;
545         nb_rxd = 0;
546         idx = 0;
547
548         rxq = rx_queue;
549         hw = rxq->hw;
550
551         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
552
553         if (unlikely(rxq->stopped)) {
554                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
555                 return 0;
556         }
557
558         while (rcd->gen == rxq->comp_ring.gen) {
559                 if (nb_rx >= nb_pkts)
560                         break;
561
562                 idx = rcd->rxdIdx;
563                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
564                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
565                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
566
567                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
568                         rte_pktmbuf_free_seg(rbi->m);
569                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
570                         goto rcd_done;
571                 }
572
573                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
574
575 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
576                 VMXNET3_ASSERT(rcd->len <= rxd->len);
577                 VMXNET3_ASSERT(rbi->m);
578 #endif
579                 if (unlikely(rcd->len == 0)) {
580                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
581                                    ring_idx, idx);
582 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
583                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
584 #endif
585                         rte_pktmbuf_free_seg(rbi->m);
586                         goto rcd_done;
587                 }
588
589                 /* Assuming a packet is coming in a single packet buffer */
590                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
591                         PMD_RX_LOG(DEBUG,
592                                    "Alert : Misbehaving device, incorrect "
593                                    " buffer type used. iPacket dropped.");
594                         rte_pktmbuf_free_seg(rbi->m);
595                         goto rcd_done;
596                 }
597 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
598                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
599 #endif
600                 /* Get the packet buffer pointer from buf_info */
601                 rxm = rbi->m;
602
603                 /* Clear descriptor associated buf_info to be reused */
604                 rbi->m = NULL;
605                 rbi->bufPA = 0;
606
607                 /* Update the index that we received a packet */
608                 rxq->cmd_ring[ring_idx].next2comp = idx;
609
610                 /* For RCD with EOP set, check if there is frame error */
611                 if (unlikely(rcd->err)) {
612                         rxq->stats.drop_total++;
613                         rxq->stats.drop_err++;
614
615                         if (!rcd->fcs) {
616                                 rxq->stats.drop_fcs++;
617                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
618                         }
619                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
620                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
621                                          rxq->comp_ring.base), rcd->rxdIdx);
622                         rte_pktmbuf_free_seg(rxm);
623                         goto rcd_done;
624                 }
625
626                 /* Check for hardware stripped VLAN tag */
627                 if (rcd->ts) {
628                         PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
629                                    rcd->tci);
630                         rxm->ol_flags = PKT_RX_VLAN_PKT;
631                         /* Copy vlan tag in packet buffer */
632                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
633                 } else {
634                         rxm->ol_flags = 0;
635                         rxm->vlan_tci = 0;
636                 }
637
638                 /* Initialize newly received packet buffer */
639                 rxm->port = rxq->port_id;
640                 rxm->nb_segs = 1;
641                 rxm->next = NULL;
642                 rxm->pkt_len = (uint16_t)rcd->len;
643                 rxm->data_len = (uint16_t)rcd->len;
644                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
645
646                 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
647                 if (rcd->v4) {
648                         struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
649                         struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
650
651                         if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
652                                 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
653                         else
654                                 rxm->ol_flags |= PKT_RX_IPV4_HDR;
655
656                         if (!rcd->cnc) {
657                                 if (!rcd->ipc)
658                                         rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
659
660                                 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
661                                         rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
662                         }
663                 }
664
665                 rx_pkts[nb_rx++] = rxm;
666 rcd_done:
667                 rxq->cmd_ring[ring_idx].next2comp = idx;
668                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
669
670                 /* It's time to allocate some new buf and renew descriptors */
671                 vmxnet3_post_rx_bufs(rxq, ring_idx);
672                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
673                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
674                                                rxq->cmd_ring[ring_idx].next2fill);
675                 }
676
677                 /* Advance to the next descriptor in comp_ring */
678                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
679
680                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
681                 nb_rxd++;
682                 if (nb_rxd > rxq->cmd_ring[0].size) {
683                         PMD_RX_LOG(ERR,
684                                    "Used up quota of receiving packets,"
685                                    " relinquish control.");
686                         break;
687                 }
688         }
689
690         return nb_rx;
691 }
692
693 /*
694  * Create memzone for device rings. malloc can't be used as the physical address is
695  * needed. If the memzone is already created, then this function returns a ptr
696  * to the old one.
697  */
698 static const struct rte_memzone *
699 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
700                       uint16_t queue_id, uint32_t ring_size, int socket_id)
701 {
702         char z_name[RTE_MEMZONE_NAMESIZE];
703         const struct rte_memzone *mz;
704
705         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
706                         dev->driver->pci_drv.name, ring_name,
707                         dev->data->port_id, queue_id);
708
709         mz = rte_memzone_lookup(z_name);
710         if (mz)
711                 return mz;
712
713         return rte_memzone_reserve_aligned(z_name, ring_size,
714                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
715 }
716
717 int
718 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
719                            uint16_t queue_idx,
720                            uint16_t nb_desc,
721                            unsigned int socket_id,
722                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
723 {
724         struct vmxnet3_hw *hw = dev->data->dev_private;
725         const struct rte_memzone *mz;
726         struct vmxnet3_tx_queue *txq;
727         struct vmxnet3_cmd_ring *ring;
728         struct vmxnet3_comp_ring *comp_ring;
729         struct vmxnet3_data_ring *data_ring;
730         int size;
731
732         PMD_INIT_FUNC_TRACE();
733
734         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
735             ETH_TXQ_FLAGS_NOMULTSEGS) {
736                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
737                 return -EINVAL;
738         }
739
740         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
741             ETH_TXQ_FLAGS_NOOFFLOADS) {
742                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
743                 return -EINVAL;
744         }
745
746         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
747         if (txq == NULL) {
748                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
749                 return -ENOMEM;
750         }
751
752         txq->queue_id = queue_idx;
753         txq->port_id = dev->data->port_id;
754         txq->shared = &hw->tqd_start[queue_idx];
755         txq->hw = hw;
756         txq->qid = queue_idx;
757         txq->stopped = TRUE;
758
759         ring = &txq->cmd_ring;
760         comp_ring = &txq->comp_ring;
761         data_ring = &txq->data_ring;
762
763         /* Tx vmxnet ring length should be between 512-4096 */
764         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
765                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
766                              VMXNET3_DEF_TX_RING_SIZE);
767                 return -EINVAL;
768         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
769                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
770                              VMXNET3_TX_RING_MAX_SIZE);
771                 return -EINVAL;
772         } else {
773                 ring->size = nb_desc;
774                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
775         }
776         comp_ring->size = data_ring->size = ring->size;
777
778         /* Tx vmxnet rings structure initialization*/
779         ring->next2fill = 0;
780         ring->next2comp = 0;
781         ring->gen = VMXNET3_INIT_GEN;
782         comp_ring->next2proc = 0;
783         comp_ring->gen = VMXNET3_INIT_GEN;
784
785         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
786         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
787         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
788
789         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
790         if (mz == NULL) {
791                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
792                 return -ENOMEM;
793         }
794         memset(mz->addr, 0, mz->len);
795
796         /* cmd_ring initialization */
797         ring->base = mz->addr;
798         ring->basePA = mz->phys_addr;
799
800         /* comp_ring initialization */
801         comp_ring->base = ring->base + ring->size;
802         comp_ring->basePA = ring->basePA +
803                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
804
805         /* data_ring initialization */
806         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
807         data_ring->basePA = comp_ring->basePA +
808                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
809
810         /* cmd_ring0 buf_info allocation */
811         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
812                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
813         if (ring->buf_info == NULL) {
814                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
815                 return -ENOMEM;
816         }
817
818         /* Update the data portion with txq */
819         dev->data->tx_queues[queue_idx] = txq;
820
821         return 0;
822 }
823
824 int
825 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
826                            uint16_t queue_idx,
827                            uint16_t nb_desc,
828                            unsigned int socket_id,
829                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
830                            struct rte_mempool *mp)
831 {
832         const struct rte_memzone *mz;
833         struct vmxnet3_rx_queue *rxq;
834         struct vmxnet3_hw     *hw = dev->data->dev_private;
835         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
836         struct vmxnet3_comp_ring *comp_ring;
837         int size;
838         uint8_t i;
839         char mem_name[32];
840         uint16_t buf_size;
841         struct rte_pktmbuf_pool_private *mbp_priv;
842
843         PMD_INIT_FUNC_TRACE();
844
845         mbp_priv = (struct rte_pktmbuf_pool_private *)
846                 rte_mempool_get_priv(mp);
847         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
848                                RTE_PKTMBUF_HEADROOM);
849
850         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
851                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
852                              "VMXNET3 don't support scatter packets yet",
853                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
854                 return -EINVAL;
855         }
856
857         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
858         if (rxq == NULL) {
859                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
860                 return -ENOMEM;
861         }
862
863         rxq->mp = mp;
864         rxq->queue_id = queue_idx;
865         rxq->port_id = dev->data->port_id;
866         rxq->shared = &hw->rqd_start[queue_idx];
867         rxq->hw = hw;
868         rxq->qid1 = queue_idx;
869         rxq->qid2 = queue_idx + hw->num_rx_queues;
870         rxq->stopped = TRUE;
871
872         ring0 = &rxq->cmd_ring[0];
873         ring1 = &rxq->cmd_ring[1];
874         comp_ring = &rxq->comp_ring;
875
876         /* Rx vmxnet rings length should be between 256-4096 */
877         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
878                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
879                 return -EINVAL;
880         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
881                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
882                 return -EINVAL;
883         } else {
884                 ring0->size = nb_desc;
885                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
886                 ring1->size = ring0->size;
887         }
888
889         comp_ring->size = ring0->size + ring1->size;
890
891         /* Rx vmxnet rings structure initialization */
892         ring0->next2fill = 0;
893         ring1->next2fill = 0;
894         ring0->next2comp = 0;
895         ring1->next2comp = 0;
896         ring0->gen = VMXNET3_INIT_GEN;
897         ring1->gen = VMXNET3_INIT_GEN;
898         comp_ring->next2proc = 0;
899         comp_ring->gen = VMXNET3_INIT_GEN;
900
901         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
902         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
903
904         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
905         if (mz == NULL) {
906                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
907                 return -ENOMEM;
908         }
909         memset(mz->addr, 0, mz->len);
910
911         /* cmd_ring0 initialization */
912         ring0->base = mz->addr;
913         ring0->basePA = mz->phys_addr;
914
915         /* cmd_ring1 initialization */
916         ring1->base = ring0->base + ring0->size;
917         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
918
919         /* comp_ring initialization */
920         comp_ring->base = ring1->base + ring1->size;
921         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
922                 ring1->size;
923
924         /* cmd_ring0-cmd_ring1 buf_info allocation */
925         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
926
927                 ring = &rxq->cmd_ring[i];
928                 ring->rid = i;
929                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
930
931                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
932                 if (ring->buf_info == NULL) {
933                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
934                         return -ENOMEM;
935                 }
936         }
937
938         /* Update the data portion with rxq */
939         dev->data->rx_queues[queue_idx] = rxq;
940
941         return 0;
942 }
943
944 /*
945  * Initializes Receive Unit
946  * Load mbufs in rx queue in advance
947  */
948 int
949 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
950 {
951         struct vmxnet3_hw *hw = dev->data->dev_private;
952
953         int i, ret;
954         uint8_t j;
955
956         PMD_INIT_FUNC_TRACE();
957
958         for (i = 0; i < hw->num_rx_queues; i++) {
959                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
960
961                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
962                         /* Passing 0 as alloc_num will allocate full ring */
963                         ret = vmxnet3_post_rx_bufs(rxq, j);
964                         if (ret <= 0) {
965                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
966                                 return -ret;
967                         }
968                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
969                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
970                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
971                                                        rxq->cmd_ring[j].next2fill);
972                         }
973                 }
974                 rxq->stopped = FALSE;
975         }
976
977         for (i = 0; i < dev->data->nb_tx_queues; i++) {
978                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
979
980                 txq->stopped = FALSE;
981         }
982
983         return 0;
984 }
985
986 static uint8_t rss_intel_key[40] = {
987         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
988         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
989         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
990         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
991         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
992 };
993
994 /*
995  * Configure RSS feature
996  */
997 int
998 vmxnet3_rss_configure(struct rte_eth_dev *dev)
999 {
1000         struct vmxnet3_hw *hw = dev->data->dev_private;
1001         struct VMXNET3_RSSConf *dev_rss_conf;
1002         struct rte_eth_rss_conf *port_rss_conf;
1003         uint64_t rss_hf;
1004         uint8_t i, j;
1005
1006         PMD_INIT_FUNC_TRACE();
1007
1008         dev_rss_conf = hw->rss_conf;
1009         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1010
1011         /* loading hashFunc */
1012         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1013         /* loading hashKeySize */
1014         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1015         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1016         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1017
1018         if (port_rss_conf->rss_key == NULL) {
1019                 /* Default hash key */
1020                 port_rss_conf->rss_key = rss_intel_key;
1021         }
1022
1023         /* loading hashKey */
1024         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1025
1026         /* loading indTable */
1027         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1028                 if (j == dev->data->nb_rx_queues)
1029                         j = 0;
1030                 dev_rss_conf->indTable[i] = j;
1031         }
1032
1033         /* loading hashType */
1034         dev_rss_conf->hashType = 0;
1035         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1036         if (rss_hf & ETH_RSS_IPV4)
1037                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1038         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1039                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1040         if (rss_hf & ETH_RSS_IPV6)
1041                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1042         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1043                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1044
1045         return VMXNET3_SUCCESS;
1046 }
1047
1048 /*
1049  * Configure VLAN Filter feature
1050  */
1051 int
1052 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1053 {
1054         uint8_t i;
1055         struct vmxnet3_hw *hw = dev->data->dev_private;
1056         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1057
1058         PMD_INIT_FUNC_TRACE();
1059
1060         /* Verify if this tag is already set */
1061         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1062                 /* Filter all vlan tags out by default */
1063                 vf_table[i] = 0;
1064                 /* To-Do: Provide another routine in dev_ops for user config */
1065
1066                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1067                                         dev->data->port_id, vf_table[i]);
1068         }
1069
1070         return VMXNET3_SUCCESS;
1071 }