13960fa993a092814a050cc5623f9ea0b6d0b0e3
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <endian.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_tailq.h>
57 #include <rte_eal.h>
58 #include <rte_per_lcore.h>
59 #include <rte_lcore.h>
60 #include <rte_atomic.h>
61 #include <rte_branch_prediction.h>
62 #include <rte_ring.h>
63 #include <rte_mempool.h>
64 #include <rte_malloc.h>
65 #include <rte_mbuf.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_prefetch.h>
69 #include <rte_udp.h>
70 #include <rte_tcp.h>
71 #include <rte_sctp.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
74
75 #include "vmxnet3/vmxnet3_defs.h"
76 #include "vmxnet3_ring.h"
77
78 #include "vmxnet3_logs.h"
79 #include "vmxnet3_ethdev.h"
80
81
82 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
83         (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
84         (char *)(mb)->buf_addr))
85
86 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
87         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
88
89 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
90
91 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
92 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
93 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
94 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
95 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
96 #endif
97
98 static inline struct rte_mbuf *
99 rte_rxmbuf_alloc(struct rte_mempool *mp)
100 {
101         struct rte_mbuf *m;
102
103         m = __rte_mbuf_raw_alloc(mp);
104         __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
105         return (m);
106 }
107
108 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
109 static void
110 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
111 {
112         uint32_t avail = 0;
113         if (rxq == NULL)
114                 return;
115
116         PMD_RX_LOG(DEBUG, "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.\n",
117                         rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118         PMD_RX_LOG(DEBUG, "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.\n",
119                                 (unsigned long)rxq->cmd_ring[0].basePA, (unsigned long)rxq->cmd_ring[1].basePA,
120                         (unsigned long)rxq->comp_ring.basePA);
121
122         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
123         PMD_RX_LOG(DEBUG, "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u\n",
124                     (uint32_t)rxq->cmd_ring[0].size, avail, rxq->comp_ring.next2proc,
125                     rxq->cmd_ring[0].size - avail);
126
127         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
128         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u\n",
129                         (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
130                         rxq->cmd_ring[1].size - avail);
131
132 }
133
134 static void
135 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
136 {
137         uint32_t avail = 0;
138         if (txq == NULL)
139                 return;
140
141         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.\n",
142                                 txq->cmd_ring.base, txq->comp_ring.base);
143         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.\n",
144                                 (unsigned long)txq->cmd_ring.basePA, (unsigned long)txq->comp_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u\n",
148                         (uint32_t)txq->cmd_ring.size, avail,
149                         txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static inline void
154 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159                 if(buf_info->m) {
160                         rte_pktmbuf_free(buf_info->m);
161                         buf_info->m = NULL;
162                         buf_info->bufPA = 0;
163                         buf_info->len = 0;
164                 }
165                 vmxnet3_cmd_ring_adv_next2comp(ring);
166         }
167         rte_free(ring->buf_info);
168 }
169
170 void
171 vmxnet3_dev_tx_queue_release(void *txq)
172 {
173         vmxnet3_tx_queue_t *tq = txq;
174         if (txq != NULL) {
175                 /* Release the cmd_ring */
176                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177         }
178 }
179
180 void
181 vmxnet3_dev_rx_queue_release(void *rxq)
182 {
183         int i;
184         vmxnet3_rx_queue_t *rq = rxq;
185         if (rxq != NULL) {
186                 /* Release both the cmd_rings */
187                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
188                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
189         }
190 }
191
192 void
193 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
194 {
195         unsigned i;
196
197         PMD_INIT_FUNC_TRACE();
198
199         for (i = 0; i < dev->data->nb_tx_queues; i++) {
200                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
201                 if (txq != NULL) {
202                         txq->stopped = TRUE;
203                         vmxnet3_dev_tx_queue_release(txq);
204                 }
205         }
206
207         for (i = 0; i < dev->data->nb_rx_queues; i++) {
208                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
209                 if(rxq != NULL) {
210                         rxq->stopped = TRUE;
211                         vmxnet3_dev_rx_queue_release(rxq);
212                 }
213         }
214 }
215
216 static inline void
217 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
218 {
219    int completed = 0;
220    struct rte_mbuf *mbuf;
221    vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
222    struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
223                                     (comp_ring->base + comp_ring->next2proc);
224
225    while (tcd->gen == comp_ring->gen) {
226
227            /* Release cmd_ring descriptor and free mbuf */
228 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
229             VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
230 #endif
231             mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
232                 if (unlikely(mbuf == NULL))
233                         rte_panic("EOP desc does not point to a valid mbuf");
234                 else
235                         rte_pktmbuf_free(mbuf);
236
237
238                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
239                 /* Mark the txd for which tcd was generated as completed */
240                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
241
242                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
243                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
244                                                                                   comp_ring->next2proc);
245                 completed++;
246    }
247
248    PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.\n", completed);
249 }
250
251 uint16_t
252 vmxnet3_xmit_pkts( void *tx_queue, struct rte_mbuf **tx_pkts,
253                 uint16_t nb_pkts)
254 {
255         uint16_t nb_tx;
256         Vmxnet3_TxDesc *txd = NULL;
257         vmxnet3_buf_info_t *tbi = NULL;
258         struct vmxnet3_hw *hw;
259         struct rte_mbuf *txm;
260         vmxnet3_tx_queue_t *txq = tx_queue;
261
262         hw = txq->hw;
263
264         if(txq->stopped) {
265                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.\n");
266                 return 0;
267         }
268
269         /* Free up the comp_descriptors aggressively */
270         vmxnet3_tq_tx_complete(txq);
271
272         nb_tx = 0;
273         while(nb_tx < nb_pkts) {
274
275                 if(vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
276
277                         txm = tx_pkts[nb_tx];
278                         /* Don't support scatter packets yet, free them if met */
279                         if (txm->pkt.nb_segs != 1) {
280                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!\n");
281                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
282                                 txq->stats.drop_total++;
283
284                                 nb_tx++;
285                                 continue;
286                         }
287
288                         /* Needs to minus ether header len */
289                         if(txm->pkt.data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
290                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU\n");
291                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
292                                 txq->stats.drop_total++;
293
294                                 nb_tx++;
295                                 continue;
296                         }
297
298                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
299
300                         /* Fill the tx descriptor */
301                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
302                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
303                         txd->addr = tbi->bufPA;
304                         txd->len = txm->pkt.data_len;
305
306                         /* Mark the last descriptor as End of Packet. */
307                         txd->cq = 1;
308                         txd->eop = 1;
309
310                         /* Record current mbuf for freeing it later in tx complete */
311 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
312                         VMXNET3_ASSERT(txm);
313 #endif
314                         tbi->m = txm;
315
316                         /* Set the offloading mode to default */
317                         txd->hlen = 0;
318                         txd->om = VMXNET3_OM_NONE;
319                         txd->msscof = 0;
320
321                         /* finally flip the GEN bit of the SOP desc  */
322                         txd->gen = txq->cmd_ring.gen;
323                         txq->shared->ctrl.txNumDeferred++;
324
325                         /* move to the next2fill descriptor */
326                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
327                         nb_tx++;
328
329                 } else {
330                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)\n");
331                         txq->stats.drop_total += (nb_pkts - nb_tx);
332                         break;
333                 }
334         }
335
336         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
337
338         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
339
340                 txq->shared->ctrl.txNumDeferred = 0;
341                 /* Notify vSwitch that packets are available. */
342                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
343                                 txq->cmd_ring.next2fill);
344         }
345
346         return (nb_tx);
347 }
348
349 /*
350  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
351  *  so that device can receive packets in those buffers.
352  *      Ring layout:
353  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
354  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
355  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
356  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
357  *      only for LRO.
358  *
359  */
360 static inline int
361 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* rxq, uint8_t ring_id)
362 {
363    int err = 0;
364    uint32_t i = 0, val = 0;
365    struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
366
367    while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
368
369                 struct Vmxnet3_RxDesc *rxd;
370                 struct rte_mbuf *mbuf;
371                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
372                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
373
374                 if (ring->rid == 0) {
375                          /* Usually: One HEAD type buf per packet
376                            * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
377                            * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
378                            */
379
380                         /* We use single packet buffer so all heads here */
381                         val = VMXNET3_RXD_BTYPE_HEAD;
382                 } else {
383                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
384                         val = VMXNET3_RXD_BTYPE_BODY;
385                 }
386
387                 /* Allocate blank mbuf for the current Rx Descriptor */
388                 mbuf = rte_rxmbuf_alloc(rxq->mp);
389                 if (mbuf == NULL) {
390                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s\n", __func__);
391                         rxq->stats.rx_buf_alloc_failure++;
392                         err = ENOMEM;
393                         break;
394                 }
395
396                 /*
397                  * Load mbuf pointer into buf_info[ring_size]
398                  * buf_info structure is equivalent to cookie for virtio-virtqueue
399                  */
400                 buf_info->m = mbuf;
401                 buf_info->len = (uint16_t)(mbuf->buf_len -
402                         RTE_PKTMBUF_HEADROOM);
403                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
404
405                 /* Load Rx Descriptor with the buffer's GPA */
406                 rxd->addr = buf_info->bufPA;
407
408                 /* After this point rxd->addr MUST not be NULL */
409                 rxd->btype = val;
410                 rxd->len = buf_info->len;
411                 /* Flip gen bit at the end to change ownership */
412                 rxd->gen = ring->gen;
413
414                 vmxnet3_cmd_ring_adv_next2fill(ring);
415                 i++;
416    }
417
418    /* Return error only if no buffers are posted at present */
419    if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size -1))
420       return -err;
421    else
422       return i;
423 }
424
425 /*
426  * Process the Rx Completion Ring of given vmxnet3_rx_queue
427  * for nb_pkts burst and return the number of packets received
428  */
429 uint16_t
430 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
431 {
432         uint16_t nb_rx;
433         uint32_t nb_rxd, idx;
434         uint8_t ring_idx;
435         vmxnet3_rx_queue_t *rxq;
436         Vmxnet3_RxCompDesc *rcd;
437         vmxnet3_buf_info_t *rbi;
438         Vmxnet3_RxDesc *rxd;
439         struct rte_mbuf *rxm = NULL;
440         struct vmxnet3_hw *hw;
441
442         nb_rx = 0;
443         ring_idx = 0;
444         nb_rxd = 0;
445         idx = 0;
446
447         rxq = rx_queue;
448         hw = rxq->hw;
449
450         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
451
452         if(rxq->stopped) {
453                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.\n");
454                 return 0;
455         }
456
457         while (rcd->gen == rxq->comp_ring.gen) {
458
459                 if(nb_rx >= nb_pkts)
460                         break;
461                 idx = rcd->rxdIdx;
462                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
463                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
464                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
465
466                 if(rcd->sop !=1 || rcd->eop != 1) {
467                         rte_pktmbuf_free_seg(rbi->m);
468
469                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
470                         goto rcd_done;
471
472                 } else {
473
474                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.\n", idx, ring_idx);
475
476 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
477                         VMXNET3_ASSERT(rcd->len <= rxd->len);
478                         VMXNET3_ASSERT(rbi->m);
479 #endif
480                         if (rcd->len == 0) {
481                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
482                                                          ring_idx, idx);
483 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
484                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
485 #endif
486                                 rte_pktmbuf_free_seg(rbi->m);
487
488                                 goto rcd_done;
489                         }
490
491                         /* Assuming a packet is coming in a single packet buffer */
492                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
493                                 PMD_RX_LOG(DEBUG, "Alert : Misbehaving device, incorrect "
494                                                   " buffer type used. iPacket dropped.\n");
495                                 rte_pktmbuf_free_seg(rbi->m);
496                                 goto rcd_done;
497                         }
498 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
499                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
500 #endif
501                         /* Get the packet buffer pointer from buf_info */
502                         rxm = rbi->m;
503
504                         /* Clear descriptor associated buf_info to be reused */
505                         rbi->m = NULL;
506                         rbi->bufPA = 0;
507
508                         /* Update the index that we received a packet */
509                         rxq->cmd_ring[ring_idx].next2comp = idx;
510
511                         /* For RCD with EOP set, check if there is frame error */
512                         if (rcd->err) {
513                                 rxq->stats.drop_total++;
514                                 rxq->stats.drop_err++;
515
516                                 if(!rcd->fcs) {
517                                         rxq->stats.drop_fcs++;
518                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.\n");
519                                 }
520                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d\n",
521                                                  (int)(rcd - (struct Vmxnet3_RxCompDesc *)
522                                                            rxq->comp_ring.base), rcd->rxdIdx);
523                                 rte_pktmbuf_free_seg(rxm);
524
525                                 goto rcd_done;
526                         }
527
528                         /* Check for hardware stripped VLAN tag */
529                         if (rcd->ts) {
530
531                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.\n",
532                                                  rcd->tci);
533                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
534
535 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
536                                 VMXNET3_ASSERT(rxm &&
537                                         rte_pktmbuf_mtod(rxm, void *));
538 #endif
539                                 //Copy vlan tag in packet buffer
540                                 rxm->pkt.vlan_macip.f.vlan_tci =
541                                         rte_le_to_cpu_16((uint16_t)rcd->tci);
542
543                         } else
544                                 rxm->ol_flags = 0;
545
546                         /* Initialize newly received packet buffer */
547                         rxm->pkt.in_port = rxq->port_id;
548                         rxm->pkt.nb_segs = 1;
549                         rxm->pkt.next = NULL;
550                         rxm->pkt.pkt_len = (uint16_t)rcd->len;
551                         rxm->pkt.data_len = (uint16_t)rcd->len;
552                         rxm->pkt.in_port = rxq->port_id;
553                         rxm->pkt.vlan_macip.f.vlan_tci = 0;
554                         rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
555
556                         rx_pkts[nb_rx++] = rxm;
557
558 rcd_done:
559                         rxq->cmd_ring[ring_idx].next2comp = idx;
560                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
561
562                         /* It's time to allocate some new buf and renew descriptors */
563                         vmxnet3_post_rx_bufs(rxq, ring_idx);
564                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
565                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
566                                                                   rxq->cmd_ring[ring_idx].next2fill);
567                         }
568
569                         /* Advance to the next descriptor in comp_ring */
570                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
571
572                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
573                         nb_rxd++;
574                         if (nb_rxd > rxq->cmd_ring[0].size) {
575                                 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
576                                                  " relinquish control.\n");
577                                 break;
578                         }
579                 }
580         }
581
582         return (nb_rx);
583 }
584
585 /*
586  * Create memzone for device rings. malloc can't be used as the physical address is
587  * needed. If the memzone is already created, then this function returns a ptr
588  * to the old one.
589  */
590 static const struct rte_memzone *
591 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
592                       uint16_t queue_id, uint32_t ring_size, int socket_id)
593 {
594         char z_name[RTE_MEMZONE_NAMESIZE];
595         const struct rte_memzone *mz;
596
597         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
598                         dev->driver->pci_drv.name, ring_name,
599                         dev->data->port_id, queue_id);
600
601         mz = rte_memzone_lookup(z_name);
602         if (mz)
603                 return mz;
604
605         return rte_memzone_reserve_aligned(z_name, ring_size,
606                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
607 }
608
609 int
610 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
611                          uint16_t queue_idx,
612                          uint16_t nb_desc,
613                          unsigned int socket_id,
614                          __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
615 {
616         const struct rte_memzone *mz;
617         struct vmxnet3_tx_queue *txq;
618         struct vmxnet3_hw     *hw;
619     struct vmxnet3_cmd_ring *ring;
620     struct vmxnet3_comp_ring *comp_ring;
621     int size;
622
623         PMD_INIT_FUNC_TRACE();
624         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
625
626         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
627                 ETH_TXQ_FLAGS_NOMULTSEGS) {
628                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet\n");
629                 return (-EINVAL);
630         }
631
632         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
633                 ETH_TXQ_FLAGS_NOOFFLOADS) {
634                 PMD_INIT_LOG(ERR, "TX not support offload function yet\n");
635                 return (-EINVAL);
636         }
637
638         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
639         if (txq == NULL) {
640                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure\n");
641                 return (-ENOMEM);
642         }
643
644         txq->queue_id = queue_idx;
645         txq->port_id = dev->data->port_id;
646         txq->shared = &hw->tqd_start[queue_idx];
647     txq->hw = hw;
648     txq->qid = queue_idx;
649     txq->stopped = TRUE;
650
651     ring = &txq->cmd_ring;
652     comp_ring = &txq->comp_ring;
653
654     /* Tx vmxnet ring length should be between 512-4096 */
655     if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
656                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u\n",
657                                         VMXNET3_DEF_TX_RING_SIZE);
658                 return -EINVAL;
659         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
660                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u\n",
661                                         VMXNET3_TX_RING_MAX_SIZE);
662                 return -EINVAL;
663     } else {
664                 ring->size = nb_desc;
665                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
666     }
667     comp_ring->size = ring->size;
668
669     /* Tx vmxnet rings structure initialization*/
670     ring->next2fill = 0;
671     ring->next2comp = 0;
672     ring->gen = VMXNET3_INIT_GEN;
673     comp_ring->next2proc = 0;
674     comp_ring->gen = VMXNET3_INIT_GEN;
675
676     size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
677     size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
678
679     mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
680         if (mz == NULL) {
681                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone\n");
682                 return (-ENOMEM);
683         }
684         memset(mz->addr, 0, mz->len);
685
686         /* cmd_ring initialization */
687         ring->base = mz->addr;
688         ring->basePA = mz->phys_addr;
689
690         /* comp_ring initialization */
691     comp_ring->base = ring->base + ring->size;
692     comp_ring->basePA = ring->basePA +
693                                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
694
695     /* cmd_ring0 buf_info allocation */
696         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
697                                 ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
698         if (ring->buf_info == NULL) {
699                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure\n");
700                 return (-ENOMEM);
701         }
702
703         /* Update the data portion with txq */
704         dev->data->tx_queues[queue_idx] = txq;
705
706         return 0;
707 }
708
709 int
710 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
711                          uint16_t queue_idx,
712                          uint16_t nb_desc,
713                          unsigned int socket_id,
714                          __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
715                          struct rte_mempool *mp)
716 {
717         const struct rte_memzone *mz;
718         struct vmxnet3_rx_queue *rxq;
719         struct vmxnet3_hw     *hw;
720         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
721         struct vmxnet3_comp_ring *comp_ring;
722         int size;
723         uint8_t i;
724         char mem_name[32];
725         uint16_t buf_size;
726         struct rte_pktmbuf_pool_private *mbp_priv;
727
728         PMD_INIT_FUNC_TRACE();
729         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
730
731         mbp_priv = (struct rte_pktmbuf_pool_private *)
732                                 rte_mempool_get_priv(mp);
733         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
734                                    RTE_PKTMBUF_HEADROOM);
735
736         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
737                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
738                                 "VMXNET3 don't support scatter packets yet\n",
739                                 buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
740                 return (-EINVAL);
741         }
742
743         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
744         if (rxq == NULL) {
745                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure\n");
746                 return (-ENOMEM);
747         }
748
749         rxq->mp = mp;
750         rxq->queue_id = queue_idx;
751         rxq->port_id = dev->data->port_id;
752         rxq->shared = &hw->rqd_start[queue_idx];
753         rxq->hw = hw;
754         rxq->qid1 = queue_idx;
755         rxq->qid2 = queue_idx + hw->num_rx_queues;
756         rxq->stopped = TRUE;
757
758         ring0 = &rxq->cmd_ring[0];
759         ring1 = &rxq->cmd_ring[1];
760         comp_ring = &rxq->comp_ring;
761
762         /* Rx vmxnet rings length should be between 256-4096 */
763         if(nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
764                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256\n");
765                 return -EINVAL;
766         } else if(nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
767                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096\n");
768                 return -EINVAL;
769         } else {
770                 ring0->size = nb_desc;
771                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
772                 ring1->size = ring0->size;
773         }
774
775         comp_ring->size = ring0->size + ring1->size;
776
777         /* Rx vmxnet rings structure initialization */
778         ring0->next2fill = 0;
779         ring1->next2fill = 0;
780         ring0->next2comp = 0;
781         ring1->next2comp = 0;
782         ring0->gen = VMXNET3_INIT_GEN;
783         ring1->gen = VMXNET3_INIT_GEN;
784         comp_ring->next2proc = 0;
785         comp_ring->gen = VMXNET3_INIT_GEN;
786
787         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
788         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
789
790         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
791         if (mz == NULL) {
792                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone\n");
793                 return (-ENOMEM);
794         }
795         memset(mz->addr, 0, mz->len);
796
797         /* cmd_ring0 initialization */
798         ring0->base = mz->addr;
799         ring0->basePA = mz->phys_addr;
800
801         /* cmd_ring1 initialization */
802         ring1->base = ring0->base + ring0->size;
803         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
804
805         /* comp_ring initialization */
806         comp_ring->base = ring1->base +  ring1->size;
807         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
808                                            ring1->size;
809
810         /* cmd_ring0-cmd_ring1 buf_info allocation */
811         for(i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
812
813           ring = &rxq->cmd_ring[i];
814           ring->rid = i;
815           snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
816
817           ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
818           if (ring->buf_info == NULL) {
819                   PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure\n");
820                   return (-ENOMEM);
821           }
822         }
823
824     /* Update the data portion with rxq */
825     dev->data->rx_queues[queue_idx] = rxq;
826
827         return 0;
828 }
829
830 /*
831  * Initializes Receive Unit
832  * Load mbufs in rx queue in advance
833  */
834 int
835 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
836 {
837         struct vmxnet3_hw *hw;
838         int i, ret;
839         uint8_t j;
840
841         PMD_INIT_FUNC_TRACE();
842         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
843
844         for (i = 0; i < hw->num_rx_queues; i++) {
845
846                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
847                 for(j = 0;j < VMXNET3_RX_CMDRING_SIZE;j++) {
848                         /* Passing 0 as alloc_num will allocate full ring */
849                         ret = vmxnet3_post_rx_bufs(rxq, j);
850                         if (ret <= 0) {
851                           PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d\n", i, j);
852                           return (-ret);
853                         }
854                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
855                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
856                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
857                                                 rxq->cmd_ring[j].next2fill);
858                         }
859                 }
860                 rxq->stopped = FALSE;
861         }
862
863         for (i = 0; i < dev->data->nb_tx_queues; i++) {
864                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
865                 txq->stopped = FALSE;
866         }
867
868         return 0;
869 }
870
871 static uint8_t rss_intel_key[40] = {
872         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
873         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
874         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
875         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
876         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
877 };
878
879 /*
880  * Configure RSS feature
881  */
882 int
883 vmxnet3_rss_configure(struct rte_eth_dev *dev)
884 {
885 #define VMXNET3_RSS_OFFLOAD_ALL ( \
886                 ETH_RSS_IPV4 | \
887                 ETH_RSS_IPV4_TCP | \
888                 ETH_RSS_IPV6 | \
889                 ETH_RSS_IPV6_TCP)
890
891         struct vmxnet3_hw *hw;
892         struct VMXNET3_RSSConf *dev_rss_conf;
893         struct rte_eth_rss_conf *port_rss_conf;
894         uint64_t rss_hf;
895         uint8_t i, j;
896
897         PMD_INIT_FUNC_TRACE();
898         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
899         dev_rss_conf = hw->rss_conf;
900         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
901
902         /* loading hashFunc */
903         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
904         /* loading hashKeySize */
905         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
906         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
907         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
908
909         if (port_rss_conf->rss_key == NULL) {
910                 /* Default hash key */
911                 port_rss_conf->rss_key = rss_intel_key;
912         }
913
914         /* loading hashKey */
915         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
916
917         /* loading indTable */
918         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
919                 if (j == dev->data->nb_rx_queues)
920                         j = 0;
921                 dev_rss_conf->indTable[i] = j;
922         }
923
924         /* loading hashType */
925         dev_rss_conf->hashType = 0;
926         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
927         if (rss_hf & ETH_RSS_IPV4)
928                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
929         if (rss_hf & ETH_RSS_IPV4_TCP)
930                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
931         if (rss_hf & ETH_RSS_IPV6)
932                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
933         if (rss_hf & ETH_RSS_IPV6_TCP)
934                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
935
936         return VMXNET3_SUCCESS;
937 }
938
939 /*
940  * Configure VLAN Filter feature
941  */
942 int
943 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
944 {
945         uint8_t i;
946         struct vmxnet3_hw *hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
947         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
948
949         PMD_INIT_FUNC_TRACE();
950
951         /* Verify if this tag is already set */
952         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
953                 /* Filter all vlan tags out by default */
954                 vf_table[i] = 0;
955                 /* To-Do: Provide another routine in dev_ops for user config */
956
957                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u\n",
958                                         dev->data->port_id, vf_table[i]);
959         }
960
961         return VMXNET3_SUCCESS;
962 }