vmxnet3: fix double spacing of log messages
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
83         (char *)(mb)->buf_addr))
84
85 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
86         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87
88 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
91 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 static inline struct rte_mbuf *
98 rte_rxmbuf_alloc(struct rte_mempool *mp)
99 {
100         struct rte_mbuf *m;
101
102         m = __rte_mbuf_raw_alloc(mp);
103         __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
104         return m;
105 }
106
107 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 static void
109 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
110 {
111         uint32_t avail = 0;
112
113         if (rxq == NULL)
114                 return;
115
116         PMD_RX_LOG(DEBUG,
117                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
118                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
119         PMD_RX_LOG(DEBUG,
120                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
121                    (unsigned long)rxq->cmd_ring[0].basePA,
122                    (unsigned long)rxq->cmd_ring[1].basePA,
123                    (unsigned long)rxq->comp_ring.basePA);
124
125         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
126         PMD_RX_LOG(DEBUG,
127                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
128                    (uint32_t)rxq->cmd_ring[0].size, avail,
129                    rxq->comp_ring.next2proc,
130                    rxq->cmd_ring[0].size - avail);
131
132         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
133         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
134                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
135                    rxq->cmd_ring[1].size - avail);
136
137 }
138
139 static void
140 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
141 {
142         uint32_t avail = 0;
143
144         if (txq == NULL)
145                 return;
146
147         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
148                    txq->cmd_ring.base, txq->comp_ring.base);
149         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
150                    (unsigned long)txq->cmd_ring.basePA,
151                    (unsigned long)txq->comp_ring.basePA);
152
153         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155                    (uint32_t)txq->cmd_ring.size, avail,
156                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
157 }
158 #endif
159
160 static inline void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         while (ring->next2comp != ring->next2fill) {
164                 /* No need to worry about tx desc ownership, device is quiesced by now. */
165                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
166
167                 if (buf_info->m) {
168                         rte_pktmbuf_free(buf_info->m);
169                         buf_info->m = NULL;
170                         buf_info->bufPA = 0;
171                         buf_info->len = 0;
172                 }
173                 vmxnet3_cmd_ring_adv_next2comp(ring);
174         }
175         rte_free(ring->buf_info);
176 }
177
178 void
179 vmxnet3_dev_tx_queue_release(void *txq)
180 {
181         vmxnet3_tx_queue_t *tq = txq;
182
183         if (txq != NULL) {
184                 /* Release the cmd_ring */
185                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
186         }
187 }
188
189 void
190 vmxnet3_dev_rx_queue_release(void *rxq)
191 {
192         int i;
193         vmxnet3_rx_queue_t *rq = rxq;
194
195         if (rxq != NULL) {
196                 /* Release both the cmd_rings */
197                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
198                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
199         }
200 }
201
202 void
203 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
204 {
205         unsigned i;
206
207         PMD_INIT_FUNC_TRACE();
208
209         for (i = 0; i < dev->data->nb_tx_queues; i++) {
210                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
211
212                 if (txq != NULL) {
213                         txq->stopped = TRUE;
214                         vmxnet3_dev_tx_queue_release(txq);
215                 }
216         }
217
218         for (i = 0; i < dev->data->nb_rx_queues; i++) {
219                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
220
221                 if (rxq != NULL) {
222                         rxq->stopped = TRUE;
223                         vmxnet3_dev_rx_queue_release(rxq);
224                 }
225         }
226 }
227
228 static inline void
229 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
230 {
231         int completed = 0;
232         struct rte_mbuf *mbuf;
233         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
234         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
235                 (comp_ring->base + comp_ring->next2proc);
236
237         while (tcd->gen == comp_ring->gen) {
238
239                 /* Release cmd_ring descriptor and free mbuf */
240 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
241                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
242 #endif
243                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
244                 if (unlikely(mbuf == NULL))
245                         rte_panic("EOP desc does not point to a valid mbuf");
246                 else
247                         rte_pktmbuf_free(mbuf);
248
249
250                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
251                 /* Mark the txd for which tcd was generated as completed */
252                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
253
254                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
255                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
256                                                     comp_ring->next2proc);
257                 completed++;
258         }
259
260         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
261 }
262
263 uint16_t
264 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
265                   uint16_t nb_pkts)
266 {
267         uint16_t nb_tx;
268         Vmxnet3_TxDesc *txd = NULL;
269         vmxnet3_buf_info_t *tbi = NULL;
270         struct vmxnet3_hw *hw;
271         struct rte_mbuf *txm;
272         vmxnet3_tx_queue_t *txq = tx_queue;
273
274         hw = txq->hw;
275
276         if (txq->stopped) {
277                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
278                 return 0;
279         }
280
281         /* Free up the comp_descriptors aggressively */
282         vmxnet3_tq_tx_complete(txq);
283
284         nb_tx = 0;
285         while (nb_tx < nb_pkts) {
286
287                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
288
289                         txm = tx_pkts[nb_tx];
290                         /* Don't support scatter packets yet, free them if met */
291                         if (txm->pkt.nb_segs != 1) {
292                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
293                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
294                                 txq->stats.drop_total++;
295
296                                 nb_tx++;
297                                 continue;
298                         }
299
300                         /* Needs to minus ether header len */
301                         if (txm->pkt.data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
302                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
303                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
304                                 txq->stats.drop_total++;
305
306                                 nb_tx++;
307                                 continue;
308                         }
309
310                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
311
312                         /* Fill the tx descriptor */
313                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
314                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
315                         txd->addr = tbi->bufPA;
316                         txd->len = txm->pkt.data_len;
317
318                         /* Mark the last descriptor as End of Packet. */
319                         txd->cq = 1;
320                         txd->eop = 1;
321
322                         /* Record current mbuf for freeing it later in tx complete */
323 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
324                         VMXNET3_ASSERT(txm);
325 #endif
326                         tbi->m = txm;
327
328                         /* Set the offloading mode to default */
329                         txd->hlen = 0;
330                         txd->om = VMXNET3_OM_NONE;
331                         txd->msscof = 0;
332
333                         /* finally flip the GEN bit of the SOP desc  */
334                         txd->gen = txq->cmd_ring.gen;
335                         txq->shared->ctrl.txNumDeferred++;
336
337                         /* move to the next2fill descriptor */
338                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
339                         nb_tx++;
340
341                 } else {
342                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
343                         txq->stats.drop_total += (nb_pkts - nb_tx);
344                         break;
345                 }
346         }
347
348         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
349
350         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
351
352                 txq->shared->ctrl.txNumDeferred = 0;
353                 /* Notify vSwitch that packets are available. */
354                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
355                                        txq->cmd_ring.next2fill);
356         }
357
358         return nb_tx;
359 }
360
361 /*
362  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
363  *  so that device can receive packets in those buffers.
364  *      Ring layout:
365  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
366  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
367  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
368  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
369  *      only for LRO.
370  *
371  */
372 static inline int
373 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
374 {
375         int err = 0;
376         uint32_t i = 0, val = 0;
377         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
378
379         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
380                 struct Vmxnet3_RxDesc *rxd;
381                 struct rte_mbuf *mbuf;
382                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
383
384                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
385
386                 if (ring->rid == 0) {
387                         /* Usually: One HEAD type buf per packet
388                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
389                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
390                          */
391
392                         /* We use single packet buffer so all heads here */
393                         val = VMXNET3_RXD_BTYPE_HEAD;
394                 } else {
395                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
396                         val = VMXNET3_RXD_BTYPE_BODY;
397                 }
398
399                 /* Allocate blank mbuf for the current Rx Descriptor */
400                 mbuf = rte_rxmbuf_alloc(rxq->mp);
401                 if (mbuf == NULL) {
402                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
403                         rxq->stats.rx_buf_alloc_failure++;
404                         err = ENOMEM;
405                         break;
406                 }
407
408                 /*
409                  * Load mbuf pointer into buf_info[ring_size]
410                  * buf_info structure is equivalent to cookie for virtio-virtqueue
411                  */
412                 buf_info->m = mbuf;
413                 buf_info->len = (uint16_t)(mbuf->buf_len -
414                                            RTE_PKTMBUF_HEADROOM);
415                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
416
417                 /* Load Rx Descriptor with the buffer's GPA */
418                 rxd->addr = buf_info->bufPA;
419
420                 /* After this point rxd->addr MUST not be NULL */
421                 rxd->btype = val;
422                 rxd->len = buf_info->len;
423                 /* Flip gen bit at the end to change ownership */
424                 rxd->gen = ring->gen;
425
426                 vmxnet3_cmd_ring_adv_next2fill(ring);
427                 i++;
428         }
429
430         /* Return error only if no buffers are posted at present */
431         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
432                 return -err;
433         else
434                 return i;
435 }
436
437 /*
438  * Process the Rx Completion Ring of given vmxnet3_rx_queue
439  * for nb_pkts burst and return the number of packets received
440  */
441 uint16_t
442 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
443 {
444         uint16_t nb_rx;
445         uint32_t nb_rxd, idx;
446         uint8_t ring_idx;
447         vmxnet3_rx_queue_t *rxq;
448         Vmxnet3_RxCompDesc *rcd;
449         vmxnet3_buf_info_t *rbi;
450         Vmxnet3_RxDesc *rxd;
451         struct rte_mbuf *rxm = NULL;
452         struct vmxnet3_hw *hw;
453
454         nb_rx = 0;
455         ring_idx = 0;
456         nb_rxd = 0;
457         idx = 0;
458
459         rxq = rx_queue;
460         hw = rxq->hw;
461
462         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
463
464         if (rxq->stopped) {
465                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
466                 return 0;
467         }
468
469         while (rcd->gen == rxq->comp_ring.gen) {
470
471                 if (nb_rx >= nb_pkts)
472                         break;
473                 idx = rcd->rxdIdx;
474                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
475                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
476                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
477
478                 if (rcd->sop != 1 || rcd->eop != 1) {
479                         rte_pktmbuf_free_seg(rbi->m);
480
481                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
482                         goto rcd_done;
483
484                 } else {
485
486                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
487
488 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
489                         VMXNET3_ASSERT(rcd->len <= rxd->len);
490                         VMXNET3_ASSERT(rbi->m);
491 #endif
492                         if (rcd->len == 0) {
493                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
494                                            ring_idx, idx);
495 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
496                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
497 #endif
498                                 rte_pktmbuf_free_seg(rbi->m);
499
500                                 goto rcd_done;
501                         }
502
503                         /* Assuming a packet is coming in a single packet buffer */
504                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
505                                 PMD_RX_LOG(DEBUG,
506                                            "Alert : Misbehaving device, incorrect "
507                                            " buffer type used. iPacket dropped.");
508                                 rte_pktmbuf_free_seg(rbi->m);
509                                 goto rcd_done;
510                         }
511 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
512                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
513 #endif
514                         /* Get the packet buffer pointer from buf_info */
515                         rxm = rbi->m;
516
517                         /* Clear descriptor associated buf_info to be reused */
518                         rbi->m = NULL;
519                         rbi->bufPA = 0;
520
521                         /* Update the index that we received a packet */
522                         rxq->cmd_ring[ring_idx].next2comp = idx;
523
524                         /* For RCD with EOP set, check if there is frame error */
525                         if (rcd->err) {
526                                 rxq->stats.drop_total++;
527                                 rxq->stats.drop_err++;
528
529                                 if (!rcd->fcs) {
530                                         rxq->stats.drop_fcs++;
531                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
532                                 }
533                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
534                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
535                                                  rxq->comp_ring.base), rcd->rxdIdx);
536                                 rte_pktmbuf_free_seg(rxm);
537
538                                 goto rcd_done;
539                         }
540
541                         /* Check for hardware stripped VLAN tag */
542                         if (rcd->ts) {
543
544                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.",
545                                            rcd->tci);
546                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
547
548 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
549                                 VMXNET3_ASSERT(rxm &&
550                                                rte_pktmbuf_mtod(rxm, void *));
551 #endif
552                                 /* Copy vlan tag in packet buffer */
553                                 rxm->pkt.vlan_macip.f.vlan_tci =
554                                         rte_le_to_cpu_16((uint16_t)rcd->tci);
555
556                         } else
557                                 rxm->ol_flags = 0;
558
559                         /* Initialize newly received packet buffer */
560                         rxm->pkt.in_port = rxq->port_id;
561                         rxm->pkt.nb_segs = 1;
562                         rxm->pkt.next = NULL;
563                         rxm->pkt.pkt_len = (uint16_t)rcd->len;
564                         rxm->pkt.data_len = (uint16_t)rcd->len;
565                         rxm->pkt.in_port = rxq->port_id;
566                         rxm->pkt.vlan_macip.f.vlan_tci = 0;
567                         rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
568
569                         rx_pkts[nb_rx++] = rxm;
570
571 rcd_done:
572                         rxq->cmd_ring[ring_idx].next2comp = idx;
573                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
574
575                         /* It's time to allocate some new buf and renew descriptors */
576                         vmxnet3_post_rx_bufs(rxq, ring_idx);
577                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
578                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
579                                                        rxq->cmd_ring[ring_idx].next2fill);
580                         }
581
582                         /* Advance to the next descriptor in comp_ring */
583                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
584
585                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
586                         nb_rxd++;
587                         if (nb_rxd > rxq->cmd_ring[0].size) {
588                                 PMD_RX_LOG(ERR,
589                                            "Used up quota of receiving packets,"
590                                            " relinquish control.");
591                                 break;
592                         }
593                 }
594         }
595
596         return nb_rx;
597 }
598
599 /*
600  * Create memzone for device rings. malloc can't be used as the physical address is
601  * needed. If the memzone is already created, then this function returns a ptr
602  * to the old one.
603  */
604 static const struct rte_memzone *
605 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
606                       uint16_t queue_id, uint32_t ring_size, int socket_id)
607 {
608         char z_name[RTE_MEMZONE_NAMESIZE];
609         const struct rte_memzone *mz;
610
611         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
612                         dev->driver->pci_drv.name, ring_name,
613                         dev->data->port_id, queue_id);
614
615         mz = rte_memzone_lookup(z_name);
616         if (mz)
617                 return mz;
618
619         return rte_memzone_reserve_aligned(z_name, ring_size,
620                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
621 }
622
623 int
624 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
625                            uint16_t queue_idx,
626                            uint16_t nb_desc,
627                            unsigned int socket_id,
628                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
629 {
630         const struct rte_memzone *mz;
631         struct vmxnet3_tx_queue *txq;
632         struct vmxnet3_hw     *hw;
633         struct vmxnet3_cmd_ring *ring;
634         struct vmxnet3_comp_ring *comp_ring;
635         int size;
636
637         PMD_INIT_FUNC_TRACE();
638         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
639
640         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
641             ETH_TXQ_FLAGS_NOMULTSEGS) {
642                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
643                 return -EINVAL;
644         }
645
646         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
647             ETH_TXQ_FLAGS_NOOFFLOADS) {
648                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
649                 return -EINVAL;
650         }
651
652         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
653         if (txq == NULL) {
654                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
655                 return -ENOMEM;
656         }
657
658         txq->queue_id = queue_idx;
659         txq->port_id = dev->data->port_id;
660         txq->shared = &hw->tqd_start[queue_idx];
661         txq->hw = hw;
662         txq->qid = queue_idx;
663         txq->stopped = TRUE;
664
665         ring = &txq->cmd_ring;
666         comp_ring = &txq->comp_ring;
667
668         /* Tx vmxnet ring length should be between 512-4096 */
669         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
670                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
671                              VMXNET3_DEF_TX_RING_SIZE);
672                 return -EINVAL;
673         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
674                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
675                              VMXNET3_TX_RING_MAX_SIZE);
676                 return -EINVAL;
677         } else {
678                 ring->size = nb_desc;
679                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
680         }
681         comp_ring->size = ring->size;
682
683         /* Tx vmxnet rings structure initialization*/
684         ring->next2fill = 0;
685         ring->next2comp = 0;
686         ring->gen = VMXNET3_INIT_GEN;
687         comp_ring->next2proc = 0;
688         comp_ring->gen = VMXNET3_INIT_GEN;
689
690         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
691         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
692
693         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
694         if (mz == NULL) {
695                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
696                 return -ENOMEM;
697         }
698         memset(mz->addr, 0, mz->len);
699
700         /* cmd_ring initialization */
701         ring->base = mz->addr;
702         ring->basePA = mz->phys_addr;
703
704         /* comp_ring initialization */
705         comp_ring->base = ring->base + ring->size;
706         comp_ring->basePA = ring->basePA +
707                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
708
709         /* cmd_ring0 buf_info allocation */
710         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
711                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
712         if (ring->buf_info == NULL) {
713                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
714                 return -ENOMEM;
715         }
716
717         /* Update the data portion with txq */
718         dev->data->tx_queues[queue_idx] = txq;
719
720         return 0;
721 }
722
723 int
724 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
725                            uint16_t queue_idx,
726                            uint16_t nb_desc,
727                            unsigned int socket_id,
728                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
729                            struct rte_mempool *mp)
730 {
731         const struct rte_memzone *mz;
732         struct vmxnet3_rx_queue *rxq;
733         struct vmxnet3_hw     *hw;
734         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
735         struct vmxnet3_comp_ring *comp_ring;
736         int size;
737         uint8_t i;
738         char mem_name[32];
739         uint16_t buf_size;
740         struct rte_pktmbuf_pool_private *mbp_priv;
741
742         PMD_INIT_FUNC_TRACE();
743         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
744
745         mbp_priv = (struct rte_pktmbuf_pool_private *)
746                 rte_mempool_get_priv(mp);
747         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
748                                RTE_PKTMBUF_HEADROOM);
749
750         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
751                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
752                              "VMXNET3 don't support scatter packets yet",
753                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
754                 return -EINVAL;
755         }
756
757         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
758         if (rxq == NULL) {
759                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
760                 return -ENOMEM;
761         }
762
763         rxq->mp = mp;
764         rxq->queue_id = queue_idx;
765         rxq->port_id = dev->data->port_id;
766         rxq->shared = &hw->rqd_start[queue_idx];
767         rxq->hw = hw;
768         rxq->qid1 = queue_idx;
769         rxq->qid2 = queue_idx + hw->num_rx_queues;
770         rxq->stopped = TRUE;
771
772         ring0 = &rxq->cmd_ring[0];
773         ring1 = &rxq->cmd_ring[1];
774         comp_ring = &rxq->comp_ring;
775
776         /* Rx vmxnet rings length should be between 256-4096 */
777         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
778                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
779                 return -EINVAL;
780         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
781                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
782                 return -EINVAL;
783         } else {
784                 ring0->size = nb_desc;
785                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
786                 ring1->size = ring0->size;
787         }
788
789         comp_ring->size = ring0->size + ring1->size;
790
791         /* Rx vmxnet rings structure initialization */
792         ring0->next2fill = 0;
793         ring1->next2fill = 0;
794         ring0->next2comp = 0;
795         ring1->next2comp = 0;
796         ring0->gen = VMXNET3_INIT_GEN;
797         ring1->gen = VMXNET3_INIT_GEN;
798         comp_ring->next2proc = 0;
799         comp_ring->gen = VMXNET3_INIT_GEN;
800
801         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
802         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
803
804         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
805         if (mz == NULL) {
806                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
807                 return -ENOMEM;
808         }
809         memset(mz->addr, 0, mz->len);
810
811         /* cmd_ring0 initialization */
812         ring0->base = mz->addr;
813         ring0->basePA = mz->phys_addr;
814
815         /* cmd_ring1 initialization */
816         ring1->base = ring0->base + ring0->size;
817         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
818
819         /* comp_ring initialization */
820         comp_ring->base = ring1->base +  ring1->size;
821         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
822                 ring1->size;
823
824         /* cmd_ring0-cmd_ring1 buf_info allocation */
825         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
826
827                 ring = &rxq->cmd_ring[i];
828                 ring->rid = i;
829                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
830
831                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
832                 if (ring->buf_info == NULL) {
833                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
834                         return -ENOMEM;
835                 }
836         }
837
838         /* Update the data portion with rxq */
839         dev->data->rx_queues[queue_idx] = rxq;
840
841         return 0;
842 }
843
844 /*
845  * Initializes Receive Unit
846  * Load mbufs in rx queue in advance
847  */
848 int
849 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
850 {
851         struct vmxnet3_hw *hw;
852         int i, ret;
853         uint8_t j;
854
855         PMD_INIT_FUNC_TRACE();
856         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
857
858         for (i = 0; i < hw->num_rx_queues; i++) {
859                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
860
861                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
862                         /* Passing 0 as alloc_num will allocate full ring */
863                         ret = vmxnet3_post_rx_bufs(rxq, j);
864                         if (ret <= 0) {
865                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
866                                 return -ret;
867                         }
868                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
869                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
870                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
871                                                        rxq->cmd_ring[j].next2fill);
872                         }
873                 }
874                 rxq->stopped = FALSE;
875         }
876
877         for (i = 0; i < dev->data->nb_tx_queues; i++) {
878                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
879
880                 txq->stopped = FALSE;
881         }
882
883         return 0;
884 }
885
886 static uint8_t rss_intel_key[40] = {
887         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
888         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
889         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
890         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
891         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
892 };
893
894 /*
895  * Configure RSS feature
896  */
897 int
898 vmxnet3_rss_configure(struct rte_eth_dev *dev)
899 {
900 #define VMXNET3_RSS_OFFLOAD_ALL ( \
901                 ETH_RSS_IPV4 | \
902                 ETH_RSS_IPV4_TCP | \
903                 ETH_RSS_IPV6 | \
904                 ETH_RSS_IPV6_TCP)
905
906         struct vmxnet3_hw *hw;
907         struct VMXNET3_RSSConf *dev_rss_conf;
908         struct rte_eth_rss_conf *port_rss_conf;
909         uint64_t rss_hf;
910         uint8_t i, j;
911
912         PMD_INIT_FUNC_TRACE();
913         hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
914         dev_rss_conf = hw->rss_conf;
915         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
916
917         /* loading hashFunc */
918         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
919         /* loading hashKeySize */
920         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
921         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
922         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
923
924         if (port_rss_conf->rss_key == NULL) {
925                 /* Default hash key */
926                 port_rss_conf->rss_key = rss_intel_key;
927         }
928
929         /* loading hashKey */
930         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
931
932         /* loading indTable */
933         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
934                 if (j == dev->data->nb_rx_queues)
935                         j = 0;
936                 dev_rss_conf->indTable[i] = j;
937         }
938
939         /* loading hashType */
940         dev_rss_conf->hashType = 0;
941         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
942         if (rss_hf & ETH_RSS_IPV4)
943                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
944         if (rss_hf & ETH_RSS_IPV4_TCP)
945                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
946         if (rss_hf & ETH_RSS_IPV6)
947                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
948         if (rss_hf & ETH_RSS_IPV6_TCP)
949                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
950
951         return VMXNET3_SUCCESS;
952 }
953
954 /*
955  * Configure VLAN Filter feature
956  */
957 int
958 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
959 {
960         uint8_t i;
961         struct vmxnet3_hw *hw = VMXNET3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
962         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
963
964         PMD_INIT_FUNC_TRACE();
965
966         /* Verify if this tag is already set */
967         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
968                 /* Filter all vlan tags out by default */
969                 vf_table[i] = 0;
970                 /* To-Do: Provide another routine in dev_ops for user config */
971
972                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
973                                         dev->data->port_id, vf_table[i]);
974         }
975
976         return VMXNET3_SUCCESS;
977 }