vmxnet3: enable for FreeBSD
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
83         (char *)(mb)->buf_addr))
84
85 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
86         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87
88 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
91 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 static inline struct rte_mbuf *
98 rte_rxmbuf_alloc(struct rte_mempool *mp)
99 {
100         struct rte_mbuf *m;
101
102         m = __rte_mbuf_raw_alloc(mp);
103         __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
104         return (m);
105 }
106
107 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 static void
109 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
110 {
111         uint32_t avail = 0;
112         if (rxq == NULL)
113                 return;
114
115         PMD_RX_LOG(DEBUG, "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.\n",
116                         rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG, "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.\n",
118                                 (unsigned long)rxq->cmd_ring[0].basePA, (unsigned long)rxq->cmd_ring[1].basePA,
119                         (unsigned long)rxq->comp_ring.basePA);
120
121         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
122         PMD_RX_LOG(DEBUG, "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u\n",
123                     (uint32_t)rxq->cmd_ring[0].size, avail, rxq->comp_ring.next2proc,
124                     rxq->cmd_ring[0].size - avail);
125
126         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
127         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u\n",
128                         (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
129                         rxq->cmd_ring[1].size - avail);
130
131 }
132
133 static void
134 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
135 {
136         uint32_t avail = 0;
137         if (txq == NULL)
138                 return;
139
140         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.\n",
141                                 txq->cmd_ring.base, txq->comp_ring.base);
142         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.\n",
143                                 (unsigned long)txq->cmd_ring.basePA, (unsigned long)txq->comp_ring.basePA);
144
145         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
146         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u\n",
147                         (uint32_t)txq->cmd_ring.size, avail,
148                         txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
149 }
150 #endif
151
152 static inline void
153 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
154 {
155         while (ring->next2comp != ring->next2fill) {
156                 /* No need to worry about tx desc ownership, device is quiesced by now. */
157                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
158                 if(buf_info->m) {
159                         rte_pktmbuf_free(buf_info->m);
160                         buf_info->m = NULL;
161                         buf_info->bufPA = 0;
162                         buf_info->len = 0;
163                 }
164                 vmxnet3_cmd_ring_adv_next2comp(ring);
165         }
166         rte_free(ring->buf_info);
167 }
168
169 void
170 vmxnet3_dev_tx_queue_release(void *txq)
171 {
172         vmxnet3_tx_queue_t *tq = txq;
173         if (txq != NULL) {
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176         }
177 }
178
179 void
180 vmxnet3_dev_rx_queue_release(void *rxq)
181 {
182         int i;
183         vmxnet3_rx_queue_t *rq = rxq;
184         if (rxq != NULL) {
185                 /* Release both the cmd_rings */
186                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
187                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
188         }
189 }
190
191 void
192 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
193 {
194         unsigned i;
195
196         PMD_INIT_FUNC_TRACE();
197
198         for (i = 0; i < dev->data->nb_tx_queues; i++) {
199                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
200                 if (txq != NULL) {
201                         txq->stopped = TRUE;
202                         vmxnet3_dev_tx_queue_release(txq);
203                 }
204         }
205
206         for (i = 0; i < dev->data->nb_rx_queues; i++) {
207                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
208                 if(rxq != NULL) {
209                         rxq->stopped = TRUE;
210                         vmxnet3_dev_rx_queue_release(rxq);
211                 }
212         }
213 }
214
215 static inline void
216 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
217 {
218    int completed = 0;
219    struct rte_mbuf *mbuf;
220    vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
221    struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
222                                     (comp_ring->base + comp_ring->next2proc);
223
224    while (tcd->gen == comp_ring->gen) {
225
226            /* Release cmd_ring descriptor and free mbuf */
227 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
228             VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
229 #endif
230             mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
231                 if (unlikely(mbuf == NULL))
232                         rte_panic("EOP desc does not point to a valid mbuf");
233                 else
234                         rte_pktmbuf_free(mbuf);
235
236
237                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
238                 /* Mark the txd for which tcd was generated as completed */
239                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
240
241                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
242                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
243                                                                                   comp_ring->next2proc);
244                 completed++;
245    }
246
247    PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.\n", completed);
248 }
249
250 uint16_t
251 vmxnet3_xmit_pkts( void *tx_queue, struct rte_mbuf **tx_pkts,
252                 uint16_t nb_pkts)
253 {
254         uint16_t nb_tx;
255         Vmxnet3_TxDesc *txd = NULL;
256         vmxnet3_buf_info_t *tbi = NULL;
257         struct vmxnet3_hw *hw;
258         struct rte_mbuf *txm;
259         vmxnet3_tx_queue_t *txq = tx_queue;
260
261         hw = txq->hw;
262
263         if(txq->stopped) {
264                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.\n");
265                 return 0;
266         }
267
268         /* Free up the comp_descriptors aggressively */
269         vmxnet3_tq_tx_complete(txq);
270
271         nb_tx = 0;
272         while(nb_tx < nb_pkts) {
273
274                 if(vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
275
276                         txm = tx_pkts[nb_tx];
277                         /* Don't support scatter packets yet, free them if met */
278                         if (txm->pkt.nb_segs != 1) {
279                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!\n");
280                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
281                                 txq->stats.drop_total++;
282
283                                 nb_tx++;
284                                 continue;
285                         }
286
287                         /* Needs to minus ether header len */
288                         if(txm->pkt.data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
289                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU\n");
290                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
291                                 txq->stats.drop_total++;
292
293                                 nb_tx++;
294                                 continue;
295                         }
296
297                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
298
299                         /* Fill the tx descriptor */
300                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
301                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
302                         txd->addr = tbi->bufPA;
303                         txd->len = txm->pkt.data_len;
304
305                         /* Mark the last descriptor as End of Packet. */
306                         txd->cq = 1;
307                         txd->eop = 1;
308
309                         /* Record current mbuf for freeing it later in tx complete */
310 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
311                         VMXNET3_ASSERT(txm);
312 #endif
313                         tbi->m = txm;
314
315                         /* Set the offloading mode to default */
316                         txd->hlen = 0;
317                         txd->om = VMXNET3_OM_NONE;
318                         txd->msscof = 0;
319
320                         /* finally flip the GEN bit of the SOP desc  */
321                         txd->gen = txq->cmd_ring.gen;
322                         txq->shared->ctrl.txNumDeferred++;
323
324                         /* move to the next2fill descriptor */
325                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
326                         nb_tx++;
327
328                 } else {
329                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)\n");
330                         txq->stats.drop_total += (nb_pkts - nb_tx);
331                         break;
332                 }
333         }
334
335         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
336
337         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
338
339                 txq->shared->ctrl.txNumDeferred = 0;
340                 /* Notify vSwitch that packets are available. */
341                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
342                                 txq->cmd_ring.next2fill);
343         }
344
345         return (nb_tx);
346 }
347
348 /*
349  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
350  *  so that device can receive packets in those buffers.
351  *      Ring layout:
352  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
353  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
354  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
355  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
356  *      only for LRO.
357  *
358  */
359 static inline int
360 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* rxq, uint8_t ring_id)
361 {
362    int err = 0;
363    uint32_t i = 0, val = 0;
364    struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
365
366    while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
367
368                 struct Vmxnet3_RxDesc *rxd;
369                 struct rte_mbuf *mbuf;
370                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
371                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
372
373                 if (ring->rid == 0) {
374                          /* Usually: One HEAD type buf per packet
375                            * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
376                            * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
377                            */
378
379                         /* We use single packet buffer so all heads here */
380                         val = VMXNET3_RXD_BTYPE_HEAD;
381                 } else {
382                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
383                         val = VMXNET3_RXD_BTYPE_BODY;
384                 }
385
386                 /* Allocate blank mbuf for the current Rx Descriptor */
387                 mbuf = rte_rxmbuf_alloc(rxq->mp);
388                 if (mbuf == NULL) {
389                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s\n", __func__);
390                         rxq->stats.rx_buf_alloc_failure++;
391                         err = ENOMEM;
392                         break;
393                 }
394
395                 /*
396                  * Load mbuf pointer into buf_info[ring_size]
397                  * buf_info structure is equivalent to cookie for virtio-virtqueue
398                  */
399                 buf_info->m = mbuf;
400                 buf_info->len = (uint16_t)(mbuf->buf_len -
401                         RTE_PKTMBUF_HEADROOM);
402                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
403
404                 /* Load Rx Descriptor with the buffer's GPA */
405                 rxd->addr = buf_info->bufPA;
406
407                 /* After this point rxd->addr MUST not be NULL */
408                 rxd->btype = val;
409                 rxd->len = buf_info->len;
410                 /* Flip gen bit at the end to change ownership */
411                 rxd->gen = ring->gen;
412
413                 vmxnet3_cmd_ring_adv_next2fill(ring);
414                 i++;
415    }
416
417    /* Return error only if no buffers are posted at present */
418    if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size -1))
419       return -err;
420    else
421       return i;
422 }
423
424 /*
425  * Process the Rx Completion Ring of given vmxnet3_rx_queue
426  * for nb_pkts burst and return the number of packets received
427  */
428 uint16_t
429 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
430 {
431         uint16_t nb_rx;
432         uint32_t nb_rxd, idx;
433         uint8_t ring_idx;
434         vmxnet3_rx_queue_t *rxq;
435         Vmxnet3_RxCompDesc *rcd;
436         vmxnet3_buf_info_t *rbi;
437         Vmxnet3_RxDesc *rxd;
438         struct rte_mbuf *rxm = NULL;
439         struct vmxnet3_hw *hw;
440
441         nb_rx = 0;
442         ring_idx = 0;
443         nb_rxd = 0;
444         idx = 0;
445
446         rxq = rx_queue;
447         hw = rxq->hw;
448
449         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
450
451         if(rxq->stopped) {
452                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.\n");
453                 return 0;
454         }
455
456         while (rcd->gen == rxq->comp_ring.gen) {
457
458                 if(nb_rx >= nb_pkts)
459                         break;
460                 idx = rcd->rxdIdx;
461                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
462                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
463                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
464
465                 if(rcd->sop !=1 || rcd->eop != 1) {
466                         rte_pktmbuf_free_seg(rbi->m);
467
468                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
469                         goto rcd_done;
470
471                 } else {
472
473                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.\n", idx, ring_idx);
474
475 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
476                         VMXNET3_ASSERT(rcd->len <= rxd->len);
477                         VMXNET3_ASSERT(rbi->m);
478 #endif
479                         if (rcd->len == 0) {
480                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
481                                                          ring_idx, idx);
482 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
483                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
484 #endif
485                                 rte_pktmbuf_free_seg(rbi->m);
486
487                                 goto rcd_done;
488                         }
489
490                         /* Assuming a packet is coming in a single packet buffer */
491                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
492                                 PMD_RX_LOG(DEBUG, "Alert : Misbehaving device, incorrect "
493                                                   " buffer type used. iPacket dropped.\n");
494                                 rte_pktmbuf_free_seg(rbi->m);
495                                 goto rcd_done;
496                         }
497 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
498                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
499 #endif
500                         /* Get the packet buffer pointer from buf_info */
501                         rxm = rbi->m;
502
503                         /* Clear descriptor associated buf_info to be reused */
504                         rbi->m = NULL;
505                         rbi->bufPA = 0;
506
507                         /* Update the index that we received a packet */
508                         rxq->cmd_ring[ring_idx].next2comp = idx;
509
510                         /* For RCD with EOP set, check if there is frame error */
511                         if (rcd->err) {
512                                 rxq->stats.drop_total++;
513                                 rxq->stats.drop_err++;
514
515                                 if(!rcd->fcs) {
516                                         rxq->stats.drop_fcs++;
517                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.\n");
518                                 }
519                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d\n",
520                                                  (int)(rcd - (struct Vmxnet3_RxCompDesc *)
521                                                            rxq->comp_ring.base), rcd->rxdIdx);
522                                 rte_pktmbuf_free_seg(rxm);
523
524                                 goto rcd_done;
525                         }
526
527                         /* Check for hardware stripped VLAN tag */
528                         if (rcd->ts) {
529
530                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.\n",
531                                                  rcd->tci);
532                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
533
534 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
535                                 VMXNET3_ASSERT(rxm &&
536                                         rte_pktmbuf_mtod(rxm, void *));
537 #endif
538                                 //Copy vlan tag in packet buffer
539                                 rxm->pkt.vlan_macip.f.vlan_tci =
540                                         rte_le_to_cpu_16((uint16_t)rcd->tci);
541
542                         } else
543                                 rxm->ol_flags = 0;
544
545                         /* Initialize newly received packet buffer */
546                         rxm->pkt.in_port = rxq->port_id;
547                         rxm->pkt.nb_segs = 1;
548                         rxm->pkt.next = NULL;
549                         rxm->pkt.pkt_len = (uint16_t)rcd->len;
550                         rxm->pkt.data_len = (uint16_t)rcd->len;
551                         rxm->pkt.in_port = rxq->port_id;
552                         rxm->pkt.vlan_macip.f.vlan_tci = 0;
553                         rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
554
555                         rx_pkts[nb_rx++] = rxm;
556
557 rcd_done:
558                         rxq->cmd_ring[ring_idx].next2comp = idx;
559                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
560
561                         /* It's time to allocate some new buf and renew descriptors */
562                         vmxnet3_post_rx_bufs(rxq, ring_idx);
563                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
564                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
565                                                                   rxq->cmd_ring[ring_idx].next2fill);
566                         }
567
568                         /* Advance to the next descriptor in comp_ring */
569                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
570
571                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
572                         nb_rxd++;
573                         if (nb_rxd > rxq->cmd_ring[0].size) {
574                                 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
575                                                  " relinquish control.\n");
576                                 break;
577                         }
578                 }
579         }
580
581         return (nb_rx);
582 }
583
584 /*
585  * Create memzone for device rings. malloc can't be used as the physical address is
586  * needed. If the memzone is already created, then this function returns a ptr
587  * to the old one.
588  */
589 static const struct rte_memzone *
590 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
591                       uint16_t queue_id, uint32_t ring_size, int socket_id)
592 {
593         char z_name[RTE_MEMZONE_NAMESIZE];
594         const struct rte_memzone *mz;
595
596         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
597                         dev->driver->pci_drv.name, ring_name,
598                         dev->data->port_id, queue_id);
599
600         mz = rte_memzone_lookup(z_name);
601         if (mz)
602                 return mz;
603
604         return rte_memzone_reserve_aligned(z_name, ring_size,
605                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
606 }
607
608 int
609 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
610                          uint16_t queue_idx,
611                          uint16_t nb_desc,
612                          unsigned int socket_id,
613                          __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
614 {
615         const struct rte_memzone *mz;
616         struct vmxnet3_tx_queue *txq;
617         struct vmxnet3_hw     *hw;
618     struct vmxnet3_cmd_ring *ring;
619     struct vmxnet3_comp_ring *comp_ring;
620     int size;
621
622         PMD_INIT_FUNC_TRACE();
623         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
624
625         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
626                 ETH_TXQ_FLAGS_NOMULTSEGS) {
627                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet\n");
628                 return (-EINVAL);
629         }
630
631         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
632                 ETH_TXQ_FLAGS_NOOFFLOADS) {
633                 PMD_INIT_LOG(ERR, "TX not support offload function yet\n");
634                 return (-EINVAL);
635         }
636
637         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
638         if (txq == NULL) {
639                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure\n");
640                 return (-ENOMEM);
641         }
642
643         txq->queue_id = queue_idx;
644         txq->port_id = dev->data->port_id;
645         txq->shared = &hw->tqd_start[queue_idx];
646     txq->hw = hw;
647     txq->qid = queue_idx;
648     txq->stopped = TRUE;
649
650     ring = &txq->cmd_ring;
651     comp_ring = &txq->comp_ring;
652
653     /* Tx vmxnet ring length should be between 512-4096 */
654     if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
655                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u\n",
656                                         VMXNET3_DEF_TX_RING_SIZE);
657                 return -EINVAL;
658         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
659                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u\n",
660                                         VMXNET3_TX_RING_MAX_SIZE);
661                 return -EINVAL;
662     } else {
663                 ring->size = nb_desc;
664                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
665     }
666     comp_ring->size = ring->size;
667
668     /* Tx vmxnet rings structure initialization*/
669     ring->next2fill = 0;
670     ring->next2comp = 0;
671     ring->gen = VMXNET3_INIT_GEN;
672     comp_ring->next2proc = 0;
673     comp_ring->gen = VMXNET3_INIT_GEN;
674
675     size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
676     size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
677
678     mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
679         if (mz == NULL) {
680                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone\n");
681                 return (-ENOMEM);
682         }
683         memset(mz->addr, 0, mz->len);
684
685         /* cmd_ring initialization */
686         ring->base = mz->addr;
687         ring->basePA = mz->phys_addr;
688
689         /* comp_ring initialization */
690     comp_ring->base = ring->base + ring->size;
691     comp_ring->basePA = ring->basePA +
692                                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
693
694     /* cmd_ring0 buf_info allocation */
695         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
696                                 ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
697         if (ring->buf_info == NULL) {
698                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure\n");
699                 return (-ENOMEM);
700         }
701
702         /* Update the data portion with txq */
703         dev->data->tx_queues[queue_idx] = txq;
704
705         return 0;
706 }
707
708 int
709 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
710                          uint16_t queue_idx,
711                          uint16_t nb_desc,
712                          unsigned int socket_id,
713                          __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
714                          struct rte_mempool *mp)
715 {
716         const struct rte_memzone *mz;
717         struct vmxnet3_rx_queue *rxq;
718         struct vmxnet3_hw     *hw;
719         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
720         struct vmxnet3_comp_ring *comp_ring;
721         int size;
722         uint8_t i;
723         char mem_name[32];
724         uint16_t buf_size;
725         struct rte_pktmbuf_pool_private *mbp_priv;
726
727         PMD_INIT_FUNC_TRACE();
728         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
729
730         mbp_priv = (struct rte_pktmbuf_pool_private *)
731                                 rte_mempool_get_priv(mp);
732         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
733                                    RTE_PKTMBUF_HEADROOM);
734
735         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
736                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
737                                 "VMXNET3 don't support scatter packets yet\n",
738                                 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
739                 return (-EINVAL);
740         }
741
742         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
743         if (rxq == NULL) {
744                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure\n");
745                 return (-ENOMEM);
746         }
747
748         rxq->mp = mp;
749         rxq->queue_id = queue_idx;
750         rxq->port_id = dev->data->port_id;
751         rxq->shared = &hw->rqd_start[queue_idx];
752         rxq->hw = hw;
753         rxq->qid1 = queue_idx;
754         rxq->qid2 = queue_idx + hw->num_rx_queues;
755         rxq->stopped = TRUE;
756
757         ring0 = &rxq->cmd_ring[0];
758         ring1 = &rxq->cmd_ring[1];
759         comp_ring = &rxq->comp_ring;
760
761         /* Rx vmxnet rings length should be between 256-4096 */
762         if(nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
763                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256\n");
764                 return -EINVAL;
765         } else if(nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
766                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096\n");
767                 return -EINVAL;
768         } else {
769                 ring0->size = nb_desc;
770                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
771                 ring1->size = ring0->size;
772         }
773
774         comp_ring->size = ring0->size + ring1->size;
775
776         /* Rx vmxnet rings structure initialization */
777         ring0->next2fill = 0;
778         ring1->next2fill = 0;
779         ring0->next2comp = 0;
780         ring1->next2comp = 0;
781         ring0->gen = VMXNET3_INIT_GEN;
782         ring1->gen = VMXNET3_INIT_GEN;
783         comp_ring->next2proc = 0;
784         comp_ring->gen = VMXNET3_INIT_GEN;
785
786         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
787         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
788
789         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
790         if (mz == NULL) {
791                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone\n");
792                 return (-ENOMEM);
793         }
794         memset(mz->addr, 0, mz->len);
795
796         /* cmd_ring0 initialization */
797         ring0->base = mz->addr;
798         ring0->basePA = mz->phys_addr;
799
800         /* cmd_ring1 initialization */
801         ring1->base = ring0->base + ring0->size;
802         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
803
804         /* comp_ring initialization */
805         comp_ring->base = ring1->base +  ring1->size;
806         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
807                                            ring1->size;
808
809         /* cmd_ring0-cmd_ring1 buf_info allocation */
810         for(i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
811
812           ring = &rxq->cmd_ring[i];
813           ring->rid = i;
814           snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
815
816           ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
817           if (ring->buf_info == NULL) {
818                   PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure\n");
819                   return (-ENOMEM);
820           }
821         }
822
823     /* Update the data portion with rxq */
824     dev->data->rx_queues[queue_idx] = rxq;
825
826         return 0;
827 }
828
829 /*
830  * Initializes Receive Unit
831  * Load mbufs in rx queue in advance
832  */
833 int
834 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
835 {
836         struct vmxnet3_hw *hw;
837         int i, ret;
838         uint8_t j;
839
840         PMD_INIT_FUNC_TRACE();
841         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
842
843         for (i = 0; i < hw->num_rx_queues; i++) {
844
845                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
846                 for(j = 0;j < VMXNET3_RX_CMDRING_SIZE;j++) {
847                         /* Passing 0 as alloc_num will allocate full ring */
848                         ret = vmxnet3_post_rx_bufs(rxq, j);
849                         if (ret <= 0) {
850                           PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d\n", i, j);
851                           return (-ret);
852                         }
853                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
854                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
855                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
856                                                 rxq->cmd_ring[j].next2fill);
857                         }
858                 }
859                 rxq->stopped = FALSE;
860         }
861
862         for (i = 0; i < dev->data->nb_tx_queues; i++) {
863                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
864                 txq->stopped = FALSE;
865         }
866
867         return 0;
868 }
869
870 static uint8_t rss_intel_key[40] = {
871         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
872         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
873         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
874         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
875         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
876 };
877
878 /*
879  * Configure RSS feature
880  */
881 int
882 vmxnet3_rss_configure(struct rte_eth_dev *dev)
883 {
884 #define VMXNET3_RSS_OFFLOAD_ALL ( \
885                 ETH_RSS_IPV4 | \
886                 ETH_RSS_IPV4_TCP | \
887                 ETH_RSS_IPV6 | \
888                 ETH_RSS_IPV6_TCP)
889
890         struct vmxnet3_hw *hw;
891         struct VMXNET3_RSSConf *dev_rss_conf;
892         struct rte_eth_rss_conf *port_rss_conf;
893         uint64_t rss_hf;
894         uint8_t i, j;
895
896         PMD_INIT_FUNC_TRACE();
897         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
898         dev_rss_conf = hw->rss_conf;
899         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
900
901         /* loading hashFunc */
902         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
903         /* loading hashKeySize */
904         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
905         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
906         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
907
908         if (port_rss_conf->rss_key == NULL) {
909                 /* Default hash key */
910                 port_rss_conf->rss_key = rss_intel_key;
911         }
912
913         /* loading hashKey */
914         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
915
916         /* loading indTable */
917         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
918                 if (j == dev->data->nb_rx_queues)
919                         j = 0;
920                 dev_rss_conf->indTable[i] = j;
921         }
922
923         /* loading hashType */
924         dev_rss_conf->hashType = 0;
925         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
926         if (rss_hf & ETH_RSS_IPV4)
927                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
928         if (rss_hf & ETH_RSS_IPV4_TCP)
929                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
930         if (rss_hf & ETH_RSS_IPV6)
931                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
932         if (rss_hf & ETH_RSS_IPV6_TCP)
933                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
934
935         return VMXNET3_SUCCESS;
936 }
937
938 /*
939  * Configure VLAN Filter feature
940  */
941 int
942 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
943 {
944         uint8_t i;
945         struct vmxnet3_hw *hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
946         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
947
948         PMD_INIT_FUNC_TRACE();
949
950         /* Verify if this tag is already set */
951         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
952                 /* Filter all vlan tags out by default */
953                 vf_table[i] = 0;
954                 /* To-Do: Provide another routine in dev_ops for user config */
955
956                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u\n",
957                                         dev->data->port_id, vf_table[i]);
958         }
959
960         return VMXNET3_SUCCESS;
961 }