vmxnet3: add vlan Tx offload
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83
84 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
85         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86
87 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88
89 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
90 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
91 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
92 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
93 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
94 #endif
95
96 static inline struct rte_mbuf *
97 rte_rxmbuf_alloc(struct rte_mempool *mp)
98 {
99         struct rte_mbuf *m;
100
101         m = __rte_mbuf_raw_alloc(mp);
102         __rte_mbuf_sanity_check_raw(m, 0);
103         return m;
104 }
105
106 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 static void
108 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 {
110         uint32_t avail = 0;
111
112         if (rxq == NULL)
113                 return;
114
115         PMD_RX_LOG(DEBUG,
116                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
117                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
120                    (unsigned long)rxq->cmd_ring[0].basePA,
121                    (unsigned long)rxq->cmd_ring[1].basePA,
122                    (unsigned long)rxq->comp_ring.basePA);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125         PMD_RX_LOG(DEBUG,
126                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
127                    (uint32_t)rxq->cmd_ring[0].size, avail,
128                    rxq->comp_ring.next2proc,
129                    rxq->cmd_ring[0].size - avail);
130
131         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
132         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
133                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
134                    rxq->cmd_ring[1].size - avail);
135
136 }
137
138 static void
139 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
140 {
141         uint32_t avail = 0;
142
143         if (txq == NULL)
144                 return;
145
146         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
147                    txq->cmd_ring.base, txq->comp_ring.base);
148         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
149                    (unsigned long)txq->cmd_ring.basePA,
150                    (unsigned long)txq->comp_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178 void
179 vmxnet3_dev_tx_queue_release(void *txq)
180 {
181         vmxnet3_tx_queue_t *tq = txq;
182
183         if (txq != NULL) {
184                 /* Release the cmd_ring */
185                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
186         }
187 }
188
189 void
190 vmxnet3_dev_rx_queue_release(void *rxq)
191 {
192         int i;
193         vmxnet3_rx_queue_t *rq = rxq;
194
195         if (rxq != NULL) {
196                 /* Release both the cmd_rings */
197                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
198                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
199         }
200 }
201
202 void
203 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
204 {
205         unsigned i;
206
207         PMD_INIT_FUNC_TRACE();
208
209         for (i = 0; i < dev->data->nb_tx_queues; i++) {
210                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
211
212                 if (txq != NULL) {
213                         txq->stopped = TRUE;
214                         vmxnet3_dev_tx_queue_release(txq);
215                 }
216         }
217
218         for (i = 0; i < dev->data->nb_rx_queues; i++) {
219                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
220
221                 if (rxq != NULL) {
222                         rxq->stopped = TRUE;
223                         vmxnet3_dev_rx_queue_release(rxq);
224                 }
225         }
226 }
227
228 static inline void
229 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
230 {
231         int completed = 0;
232         struct rte_mbuf *mbuf;
233         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
234         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
235                 (comp_ring->base + comp_ring->next2proc);
236
237         while (tcd->gen == comp_ring->gen) {
238
239                 /* Release cmd_ring descriptor and free mbuf */
240 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
241                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
242 #endif
243                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
244                 if (unlikely(mbuf == NULL))
245                         rte_panic("EOP desc does not point to a valid mbuf");
246                 else
247                         rte_pktmbuf_free(mbuf);
248
249
250                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
251                 /* Mark the txd for which tcd was generated as completed */
252                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
253
254                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
255                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
256                                                     comp_ring->next2proc);
257                 completed++;
258         }
259
260         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
261 }
262
263 uint16_t
264 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
265                   uint16_t nb_pkts)
266 {
267         uint16_t nb_tx;
268         Vmxnet3_TxDesc *txd = NULL;
269         vmxnet3_buf_info_t *tbi = NULL;
270         struct vmxnet3_hw *hw;
271         struct rte_mbuf *txm;
272         vmxnet3_tx_queue_t *txq = tx_queue;
273
274         hw = txq->hw;
275
276         if (txq->stopped) {
277                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
278                 return 0;
279         }
280
281         /* Free up the comp_descriptors aggressively */
282         vmxnet3_tq_tx_complete(txq);
283
284         nb_tx = 0;
285         while (nb_tx < nb_pkts) {
286
287                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
288
289                         txm = tx_pkts[nb_tx];
290                         /* Don't support scatter packets yet, free them if met */
291                         if (txm->nb_segs != 1) {
292                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
293                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
294                                 txq->stats.drop_total++;
295
296                                 nb_tx++;
297                                 continue;
298                         }
299
300                         /* Needs to minus ether header len */
301                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
302                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
303                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
304                                 txq->stats.drop_total++;
305
306                                 nb_tx++;
307                                 continue;
308                         }
309
310                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
311
312                         /* Fill the tx descriptor */
313                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
314                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
315                         txd->addr = tbi->bufPA;
316                         txd->len = txm->data_len;
317
318                         /* Mark the last descriptor as End of Packet. */
319                         txd->cq = 1;
320                         txd->eop = 1;
321
322                         /* Add VLAN tag if requested */
323                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
324                                 txd->ti = 1;
325                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
326                         }
327
328                         /* Record current mbuf for freeing it later in tx complete */
329 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
330                         VMXNET3_ASSERT(txm);
331 #endif
332                         tbi->m = txm;
333
334                         /* Set the offloading mode to default */
335                         txd->hlen = 0;
336                         txd->om = VMXNET3_OM_NONE;
337                         txd->msscof = 0;
338
339                         /* finally flip the GEN bit of the SOP desc  */
340                         txd->gen = txq->cmd_ring.gen;
341                         txq->shared->ctrl.txNumDeferred++;
342
343                         /* move to the next2fill descriptor */
344                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
345                         nb_tx++;
346
347                 } else {
348                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
349                         txq->stats.drop_total += (nb_pkts - nb_tx);
350                         break;
351                 }
352         }
353
354         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
355
356         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
357
358                 txq->shared->ctrl.txNumDeferred = 0;
359                 /* Notify vSwitch that packets are available. */
360                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
361                                        txq->cmd_ring.next2fill);
362         }
363
364         return nb_tx;
365 }
366
367 /*
368  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
369  *  so that device can receive packets in those buffers.
370  *      Ring layout:
371  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
372  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
373  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
374  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
375  *      only for LRO.
376  *
377  */
378 static inline int
379 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
380 {
381         int err = 0;
382         uint32_t i = 0, val = 0;
383         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
384
385         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
386                 struct Vmxnet3_RxDesc *rxd;
387                 struct rte_mbuf *mbuf;
388                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
389
390                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
391
392                 if (ring->rid == 0) {
393                         /* Usually: One HEAD type buf per packet
394                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
395                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
396                          */
397
398                         /* We use single packet buffer so all heads here */
399                         val = VMXNET3_RXD_BTYPE_HEAD;
400                 } else {
401                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
402                         val = VMXNET3_RXD_BTYPE_BODY;
403                 }
404
405                 /* Allocate blank mbuf for the current Rx Descriptor */
406                 mbuf = rte_rxmbuf_alloc(rxq->mp);
407                 if (mbuf == NULL) {
408                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
409                         rxq->stats.rx_buf_alloc_failure++;
410                         err = ENOMEM;
411                         break;
412                 }
413
414                 /*
415                  * Load mbuf pointer into buf_info[ring_size]
416                  * buf_info structure is equivalent to cookie for virtio-virtqueue
417                  */
418                 buf_info->m = mbuf;
419                 buf_info->len = (uint16_t)(mbuf->buf_len -
420                                            RTE_PKTMBUF_HEADROOM);
421                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
422
423                 /* Load Rx Descriptor with the buffer's GPA */
424                 rxd->addr = buf_info->bufPA;
425
426                 /* After this point rxd->addr MUST not be NULL */
427                 rxd->btype = val;
428                 rxd->len = buf_info->len;
429                 /* Flip gen bit at the end to change ownership */
430                 rxd->gen = ring->gen;
431
432                 vmxnet3_cmd_ring_adv_next2fill(ring);
433                 i++;
434         }
435
436         /* Return error only if no buffers are posted at present */
437         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
438                 return -err;
439         else
440                 return i;
441 }
442
443 /*
444  * Process the Rx Completion Ring of given vmxnet3_rx_queue
445  * for nb_pkts burst and return the number of packets received
446  */
447 uint16_t
448 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
449 {
450         uint16_t nb_rx;
451         uint32_t nb_rxd, idx;
452         uint8_t ring_idx;
453         vmxnet3_rx_queue_t *rxq;
454         Vmxnet3_RxCompDesc *rcd;
455         vmxnet3_buf_info_t *rbi;
456         Vmxnet3_RxDesc *rxd;
457         struct rte_mbuf *rxm = NULL;
458         struct vmxnet3_hw *hw;
459
460         nb_rx = 0;
461         ring_idx = 0;
462         nb_rxd = 0;
463         idx = 0;
464
465         rxq = rx_queue;
466         hw = rxq->hw;
467
468         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
469
470         if (rxq->stopped) {
471                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
472                 return 0;
473         }
474
475         while (rcd->gen == rxq->comp_ring.gen) {
476
477                 if (nb_rx >= nb_pkts)
478                         break;
479                 idx = rcd->rxdIdx;
480                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
481                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
482                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
483
484                 if (rcd->sop != 1 || rcd->eop != 1) {
485                         rte_pktmbuf_free_seg(rbi->m);
486
487                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
488                         goto rcd_done;
489
490                 } else {
491
492                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
493
494 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
495                         VMXNET3_ASSERT(rcd->len <= rxd->len);
496                         VMXNET3_ASSERT(rbi->m);
497 #endif
498                         if (rcd->len == 0) {
499                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
500                                            ring_idx, idx);
501 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
502                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
503 #endif
504                                 rte_pktmbuf_free_seg(rbi->m);
505
506                                 goto rcd_done;
507                         }
508
509                         /* Assuming a packet is coming in a single packet buffer */
510                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
511                                 PMD_RX_LOG(DEBUG,
512                                            "Alert : Misbehaving device, incorrect "
513                                            " buffer type used. iPacket dropped.");
514                                 rte_pktmbuf_free_seg(rbi->m);
515                                 goto rcd_done;
516                         }
517 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
518                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
519 #endif
520                         /* Get the packet buffer pointer from buf_info */
521                         rxm = rbi->m;
522
523                         /* Clear descriptor associated buf_info to be reused */
524                         rbi->m = NULL;
525                         rbi->bufPA = 0;
526
527                         /* Update the index that we received a packet */
528                         rxq->cmd_ring[ring_idx].next2comp = idx;
529
530                         /* For RCD with EOP set, check if there is frame error */
531                         if (rcd->err) {
532                                 rxq->stats.drop_total++;
533                                 rxq->stats.drop_err++;
534
535                                 if (!rcd->fcs) {
536                                         rxq->stats.drop_fcs++;
537                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
538                                 }
539                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
540                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
541                                                  rxq->comp_ring.base), rcd->rxdIdx);
542                                 rte_pktmbuf_free_seg(rxm);
543
544                                 goto rcd_done;
545                         }
546
547                         /* Check for hardware stripped VLAN tag */
548                         if (rcd->ts) {
549                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.",
550                                            rcd->tci);
551                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
552 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
553                                 VMXNET3_ASSERT(rxm &&
554                                                rte_pktmbuf_mtod(rxm, void *));
555 #endif
556                                 /* Copy vlan tag in packet buffer */
557                                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
558                         } else {
559                                 rxm->ol_flags = 0;
560                                 rxm->vlan_tci = 0;
561                         }
562
563                         /* Initialize newly received packet buffer */
564                         rxm->port = rxq->port_id;
565                         rxm->nb_segs = 1;
566                         rxm->next = NULL;
567                         rxm->pkt_len = (uint16_t)rcd->len;
568                         rxm->data_len = (uint16_t)rcd->len;
569                         rxm->port = rxq->port_id;
570                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
571
572                         rx_pkts[nb_rx++] = rxm;
573 rcd_done:
574                         rxq->cmd_ring[ring_idx].next2comp = idx;
575                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
576
577                         /* It's time to allocate some new buf and renew descriptors */
578                         vmxnet3_post_rx_bufs(rxq, ring_idx);
579                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
580                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
581                                                        rxq->cmd_ring[ring_idx].next2fill);
582                         }
583
584                         /* Advance to the next descriptor in comp_ring */
585                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
586
587                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
588                         nb_rxd++;
589                         if (nb_rxd > rxq->cmd_ring[0].size) {
590                                 PMD_RX_LOG(ERR,
591                                            "Used up quota of receiving packets,"
592                                            " relinquish control.");
593                                 break;
594                         }
595                 }
596         }
597
598         return nb_rx;
599 }
600
601 /*
602  * Create memzone for device rings. malloc can't be used as the physical address is
603  * needed. If the memzone is already created, then this function returns a ptr
604  * to the old one.
605  */
606 static const struct rte_memzone *
607 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
608                       uint16_t queue_id, uint32_t ring_size, int socket_id)
609 {
610         char z_name[RTE_MEMZONE_NAMESIZE];
611         const struct rte_memzone *mz;
612
613         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
614                         dev->driver->pci_drv.name, ring_name,
615                         dev->data->port_id, queue_id);
616
617         mz = rte_memzone_lookup(z_name);
618         if (mz)
619                 return mz;
620
621         return rte_memzone_reserve_aligned(z_name, ring_size,
622                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
623 }
624
625 int
626 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
627                            uint16_t queue_idx,
628                            uint16_t nb_desc,
629                            unsigned int socket_id,
630                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
631 {
632         struct vmxnet3_hw     *hw = dev->data->dev_private;
633         const struct rte_memzone *mz;
634         struct vmxnet3_tx_queue *txq;
635         struct vmxnet3_cmd_ring *ring;
636         struct vmxnet3_comp_ring *comp_ring;
637         int size;
638
639         PMD_INIT_FUNC_TRACE();
640
641         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
642             ETH_TXQ_FLAGS_NOMULTSEGS) {
643                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
644                 return -EINVAL;
645         }
646
647         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
648             ETH_TXQ_FLAGS_NOOFFLOADS) {
649                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
650                 return -EINVAL;
651         }
652
653         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
654         if (txq == NULL) {
655                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
656                 return -ENOMEM;
657         }
658
659         txq->queue_id = queue_idx;
660         txq->port_id = dev->data->port_id;
661         txq->shared = &hw->tqd_start[queue_idx];
662         txq->hw = hw;
663         txq->qid = queue_idx;
664         txq->stopped = TRUE;
665
666         ring = &txq->cmd_ring;
667         comp_ring = &txq->comp_ring;
668
669         /* Tx vmxnet ring length should be between 512-4096 */
670         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
671                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
672                              VMXNET3_DEF_TX_RING_SIZE);
673                 return -EINVAL;
674         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
675                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
676                              VMXNET3_TX_RING_MAX_SIZE);
677                 return -EINVAL;
678         } else {
679                 ring->size = nb_desc;
680                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
681         }
682         comp_ring->size = ring->size;
683
684         /* Tx vmxnet rings structure initialization*/
685         ring->next2fill = 0;
686         ring->next2comp = 0;
687         ring->gen = VMXNET3_INIT_GEN;
688         comp_ring->next2proc = 0;
689         comp_ring->gen = VMXNET3_INIT_GEN;
690
691         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
692         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
693
694         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
695         if (mz == NULL) {
696                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
697                 return -ENOMEM;
698         }
699         memset(mz->addr, 0, mz->len);
700
701         /* cmd_ring initialization */
702         ring->base = mz->addr;
703         ring->basePA = mz->phys_addr;
704
705         /* comp_ring initialization */
706         comp_ring->base = ring->base + ring->size;
707         comp_ring->basePA = ring->basePA +
708                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
709
710         /* cmd_ring0 buf_info allocation */
711         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
712                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
713         if (ring->buf_info == NULL) {
714                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
715                 return -ENOMEM;
716         }
717
718         /* Update the data portion with txq */
719         dev->data->tx_queues[queue_idx] = txq;
720
721         return 0;
722 }
723
724 int
725 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
726                            uint16_t queue_idx,
727                            uint16_t nb_desc,
728                            unsigned int socket_id,
729                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
730                            struct rte_mempool *mp)
731 {
732         const struct rte_memzone *mz;
733         struct vmxnet3_rx_queue *rxq;
734         struct vmxnet3_hw     *hw = dev->data->dev_private;
735         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
736         struct vmxnet3_comp_ring *comp_ring;
737         int size;
738         uint8_t i;
739         char mem_name[32];
740         uint16_t buf_size;
741         struct rte_pktmbuf_pool_private *mbp_priv;
742
743         PMD_INIT_FUNC_TRACE();
744
745         mbp_priv = (struct rte_pktmbuf_pool_private *)
746                 rte_mempool_get_priv(mp);
747         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
748                                RTE_PKTMBUF_HEADROOM);
749
750         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
751                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
752                              "VMXNET3 don't support scatter packets yet",
753                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
754                 return -EINVAL;
755         }
756
757         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
758         if (rxq == NULL) {
759                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
760                 return -ENOMEM;
761         }
762
763         rxq->mp = mp;
764         rxq->queue_id = queue_idx;
765         rxq->port_id = dev->data->port_id;
766         rxq->shared = &hw->rqd_start[queue_idx];
767         rxq->hw = hw;
768         rxq->qid1 = queue_idx;
769         rxq->qid2 = queue_idx + hw->num_rx_queues;
770         rxq->stopped = TRUE;
771
772         ring0 = &rxq->cmd_ring[0];
773         ring1 = &rxq->cmd_ring[1];
774         comp_ring = &rxq->comp_ring;
775
776         /* Rx vmxnet rings length should be between 256-4096 */
777         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
778                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
779                 return -EINVAL;
780         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
781                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
782                 return -EINVAL;
783         } else {
784                 ring0->size = nb_desc;
785                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
786                 ring1->size = ring0->size;
787         }
788
789         comp_ring->size = ring0->size + ring1->size;
790
791         /* Rx vmxnet rings structure initialization */
792         ring0->next2fill = 0;
793         ring1->next2fill = 0;
794         ring0->next2comp = 0;
795         ring1->next2comp = 0;
796         ring0->gen = VMXNET3_INIT_GEN;
797         ring1->gen = VMXNET3_INIT_GEN;
798         comp_ring->next2proc = 0;
799         comp_ring->gen = VMXNET3_INIT_GEN;
800
801         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
802         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
803
804         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
805         if (mz == NULL) {
806                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
807                 return -ENOMEM;
808         }
809         memset(mz->addr, 0, mz->len);
810
811         /* cmd_ring0 initialization */
812         ring0->base = mz->addr;
813         ring0->basePA = mz->phys_addr;
814
815         /* cmd_ring1 initialization */
816         ring1->base = ring0->base + ring0->size;
817         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
818
819         /* comp_ring initialization */
820         comp_ring->base = ring1->base +  ring1->size;
821         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
822                 ring1->size;
823
824         /* cmd_ring0-cmd_ring1 buf_info allocation */
825         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
826
827                 ring = &rxq->cmd_ring[i];
828                 ring->rid = i;
829                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
830
831                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
832                 if (ring->buf_info == NULL) {
833                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
834                         return -ENOMEM;
835                 }
836         }
837
838         /* Update the data portion with rxq */
839         dev->data->rx_queues[queue_idx] = rxq;
840
841         return 0;
842 }
843
844 /*
845  * Initializes Receive Unit
846  * Load mbufs in rx queue in advance
847  */
848 int
849 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
850 {
851         struct vmxnet3_hw *hw = dev->data->dev_private;
852
853         int i, ret;
854         uint8_t j;
855
856         PMD_INIT_FUNC_TRACE();
857
858         for (i = 0; i < hw->num_rx_queues; i++) {
859                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
860
861                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
862                         /* Passing 0 as alloc_num will allocate full ring */
863                         ret = vmxnet3_post_rx_bufs(rxq, j);
864                         if (ret <= 0) {
865                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
866                                 return -ret;
867                         }
868                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
869                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
870                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
871                                                        rxq->cmd_ring[j].next2fill);
872                         }
873                 }
874                 rxq->stopped = FALSE;
875         }
876
877         for (i = 0; i < dev->data->nb_tx_queues; i++) {
878                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
879
880                 txq->stopped = FALSE;
881         }
882
883         return 0;
884 }
885
886 static uint8_t rss_intel_key[40] = {
887         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
888         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
889         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
890         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
891         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
892 };
893
894 /*
895  * Configure RSS feature
896  */
897 int
898 vmxnet3_rss_configure(struct rte_eth_dev *dev)
899 {
900 #define VMXNET3_RSS_OFFLOAD_ALL ( \
901                 ETH_RSS_IPV4 | \
902                 ETH_RSS_IPV4_TCP | \
903                 ETH_RSS_IPV6 | \
904                 ETH_RSS_IPV6_TCP)
905
906         struct vmxnet3_hw *hw = dev->data->dev_private;
907         struct VMXNET3_RSSConf *dev_rss_conf;
908         struct rte_eth_rss_conf *port_rss_conf;
909         uint64_t rss_hf;
910         uint8_t i, j;
911
912         PMD_INIT_FUNC_TRACE();
913
914         dev_rss_conf = hw->rss_conf;
915         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
916
917         /* loading hashFunc */
918         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
919         /* loading hashKeySize */
920         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
921         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
922         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
923
924         if (port_rss_conf->rss_key == NULL) {
925                 /* Default hash key */
926                 port_rss_conf->rss_key = rss_intel_key;
927         }
928
929         /* loading hashKey */
930         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
931
932         /* loading indTable */
933         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
934                 if (j == dev->data->nb_rx_queues)
935                         j = 0;
936                 dev_rss_conf->indTable[i] = j;
937         }
938
939         /* loading hashType */
940         dev_rss_conf->hashType = 0;
941         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
942         if (rss_hf & ETH_RSS_IPV4)
943                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
944         if (rss_hf & ETH_RSS_IPV4_TCP)
945                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
946         if (rss_hf & ETH_RSS_IPV6)
947                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
948         if (rss_hf & ETH_RSS_IPV6_TCP)
949                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
950
951         return VMXNET3_SUCCESS;
952 }
953
954 /*
955  * Configure VLAN Filter feature
956  */
957 int
958 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
959 {
960         uint8_t i;
961         struct vmxnet3_hw *hw = dev->data->dev_private;
962         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
963
964         PMD_INIT_FUNC_TRACE();
965
966         /* Verify if this tag is already set */
967         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
968                 /* Filter all vlan tags out by default */
969                 vf_table[i] = 0;
970                 /* To-Do: Provide another routine in dev_ops for user config */
971
972                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
973                                         dev->data->port_id, vf_table[i]);
974         }
975
976         return VMXNET3_SUCCESS;
977 }