e74b6fd121eb195393a7ddf3610d6f961d2fb011
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->data) - \
83         (char *)(mb)->buf_addr))
84
85 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
86         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87
88 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
91 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 static inline struct rte_mbuf *
98 rte_rxmbuf_alloc(struct rte_mempool *mp)
99 {
100         struct rte_mbuf *m;
101
102         m = __rte_mbuf_raw_alloc(mp);
103         __rte_mbuf_sanity_check_raw(m, 0);
104         return m;
105 }
106
107 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 static void
109 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
110 {
111         uint32_t avail = 0;
112
113         if (rxq == NULL)
114                 return;
115
116         PMD_RX_LOG(DEBUG,
117                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
118                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
119         PMD_RX_LOG(DEBUG,
120                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
121                    (unsigned long)rxq->cmd_ring[0].basePA,
122                    (unsigned long)rxq->cmd_ring[1].basePA,
123                    (unsigned long)rxq->comp_ring.basePA);
124
125         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
126         PMD_RX_LOG(DEBUG,
127                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
128                    (uint32_t)rxq->cmd_ring[0].size, avail,
129                    rxq->comp_ring.next2proc,
130                    rxq->cmd_ring[0].size - avail);
131
132         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
133         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
134                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
135                    rxq->cmd_ring[1].size - avail);
136
137 }
138
139 static void
140 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
141 {
142         uint32_t avail = 0;
143
144         if (txq == NULL)
145                 return;
146
147         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
148                    txq->cmd_ring.base, txq->comp_ring.base);
149         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
150                    (unsigned long)txq->cmd_ring.basePA,
151                    (unsigned long)txq->comp_ring.basePA);
152
153         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155                    (uint32_t)txq->cmd_ring.size, avail,
156                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
157 }
158 #endif
159
160 static inline void
161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163         while (ring->next2comp != ring->next2fill) {
164                 /* No need to worry about tx desc ownership, device is quiesced by now. */
165                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
166
167                 if (buf_info->m) {
168                         rte_pktmbuf_free(buf_info->m);
169                         buf_info->m = NULL;
170                         buf_info->bufPA = 0;
171                         buf_info->len = 0;
172                 }
173                 vmxnet3_cmd_ring_adv_next2comp(ring);
174         }
175         rte_free(ring->buf_info);
176         ring->buf_info = NULL;
177 }
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (txq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rxq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 void
204 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
205 {
206         unsigned i;
207
208         PMD_INIT_FUNC_TRACE();
209
210         for (i = 0; i < dev->data->nb_tx_queues; i++) {
211                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
212
213                 if (txq != NULL) {
214                         txq->stopped = TRUE;
215                         vmxnet3_dev_tx_queue_release(txq);
216                 }
217         }
218
219         for (i = 0; i < dev->data->nb_rx_queues; i++) {
220                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
221
222                 if (rxq != NULL) {
223                         rxq->stopped = TRUE;
224                         vmxnet3_dev_rx_queue_release(rxq);
225                 }
226         }
227 }
228
229 static inline void
230 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
231 {
232         int completed = 0;
233         struct rte_mbuf *mbuf;
234         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
235         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
236                 (comp_ring->base + comp_ring->next2proc);
237
238         while (tcd->gen == comp_ring->gen) {
239
240                 /* Release cmd_ring descriptor and free mbuf */
241 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
242                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
243 #endif
244                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
245                 if (unlikely(mbuf == NULL))
246                         rte_panic("EOP desc does not point to a valid mbuf");
247                 else
248                         rte_pktmbuf_free(mbuf);
249
250
251                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
252                 /* Mark the txd for which tcd was generated as completed */
253                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
254
255                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
256                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
257                                                     comp_ring->next2proc);
258                 completed++;
259         }
260
261         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
262 }
263
264 uint16_t
265 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
266                   uint16_t nb_pkts)
267 {
268         uint16_t nb_tx;
269         Vmxnet3_TxDesc *txd = NULL;
270         vmxnet3_buf_info_t *tbi = NULL;
271         struct vmxnet3_hw *hw;
272         struct rte_mbuf *txm;
273         vmxnet3_tx_queue_t *txq = tx_queue;
274
275         hw = txq->hw;
276
277         if (txq->stopped) {
278                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
279                 return 0;
280         }
281
282         /* Free up the comp_descriptors aggressively */
283         vmxnet3_tq_tx_complete(txq);
284
285         nb_tx = 0;
286         while (nb_tx < nb_pkts) {
287
288                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
289
290                         txm = tx_pkts[nb_tx];
291                         /* Don't support scatter packets yet, free them if met */
292                         if (txm->nb_segs != 1) {
293                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
294                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
295                                 txq->stats.drop_total++;
296
297                                 nb_tx++;
298                                 continue;
299                         }
300
301                         /* Needs to minus ether header len */
302                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
303                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
304                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
305                                 txq->stats.drop_total++;
306
307                                 nb_tx++;
308                                 continue;
309                         }
310
311                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
312
313                         /* Fill the tx descriptor */
314                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
315                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
316                         txd->addr = tbi->bufPA;
317                         txd->len = txm->data_len;
318
319                         /* Mark the last descriptor as End of Packet. */
320                         txd->cq = 1;
321                         txd->eop = 1;
322
323                         /* Record current mbuf for freeing it later in tx complete */
324 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
325                         VMXNET3_ASSERT(txm);
326 #endif
327                         tbi->m = txm;
328
329                         /* Set the offloading mode to default */
330                         txd->hlen = 0;
331                         txd->om = VMXNET3_OM_NONE;
332                         txd->msscof = 0;
333
334                         /* finally flip the GEN bit of the SOP desc  */
335                         txd->gen = txq->cmd_ring.gen;
336                         txq->shared->ctrl.txNumDeferred++;
337
338                         /* move to the next2fill descriptor */
339                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
340                         nb_tx++;
341
342                 } else {
343                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
344                         txq->stats.drop_total += (nb_pkts - nb_tx);
345                         break;
346                 }
347         }
348
349         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
350
351         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
352
353                 txq->shared->ctrl.txNumDeferred = 0;
354                 /* Notify vSwitch that packets are available. */
355                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
356                                        txq->cmd_ring.next2fill);
357         }
358
359         return nb_tx;
360 }
361
362 /*
363  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
364  *  so that device can receive packets in those buffers.
365  *      Ring layout:
366  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
367  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
368  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
369  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
370  *      only for LRO.
371  *
372  */
373 static inline int
374 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
375 {
376         int err = 0;
377         uint32_t i = 0, val = 0;
378         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
379
380         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
381                 struct Vmxnet3_RxDesc *rxd;
382                 struct rte_mbuf *mbuf;
383                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
384
385                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
386
387                 if (ring->rid == 0) {
388                         /* Usually: One HEAD type buf per packet
389                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
390                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
391                          */
392
393                         /* We use single packet buffer so all heads here */
394                         val = VMXNET3_RXD_BTYPE_HEAD;
395                 } else {
396                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
397                         val = VMXNET3_RXD_BTYPE_BODY;
398                 }
399
400                 /* Allocate blank mbuf for the current Rx Descriptor */
401                 mbuf = rte_rxmbuf_alloc(rxq->mp);
402                 if (mbuf == NULL) {
403                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
404                         rxq->stats.rx_buf_alloc_failure++;
405                         err = ENOMEM;
406                         break;
407                 }
408
409                 /*
410                  * Load mbuf pointer into buf_info[ring_size]
411                  * buf_info structure is equivalent to cookie for virtio-virtqueue
412                  */
413                 buf_info->m = mbuf;
414                 buf_info->len = (uint16_t)(mbuf->buf_len -
415                                            RTE_PKTMBUF_HEADROOM);
416                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
417
418                 /* Load Rx Descriptor with the buffer's GPA */
419                 rxd->addr = buf_info->bufPA;
420
421                 /* After this point rxd->addr MUST not be NULL */
422                 rxd->btype = val;
423                 rxd->len = buf_info->len;
424                 /* Flip gen bit at the end to change ownership */
425                 rxd->gen = ring->gen;
426
427                 vmxnet3_cmd_ring_adv_next2fill(ring);
428                 i++;
429         }
430
431         /* Return error only if no buffers are posted at present */
432         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
433                 return -err;
434         else
435                 return i;
436 }
437
438 /*
439  * Process the Rx Completion Ring of given vmxnet3_rx_queue
440  * for nb_pkts burst and return the number of packets received
441  */
442 uint16_t
443 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
444 {
445         uint16_t nb_rx;
446         uint32_t nb_rxd, idx;
447         uint8_t ring_idx;
448         vmxnet3_rx_queue_t *rxq;
449         Vmxnet3_RxCompDesc *rcd;
450         vmxnet3_buf_info_t *rbi;
451         Vmxnet3_RxDesc *rxd;
452         struct rte_mbuf *rxm = NULL;
453         struct vmxnet3_hw *hw;
454
455         nb_rx = 0;
456         ring_idx = 0;
457         nb_rxd = 0;
458         idx = 0;
459
460         rxq = rx_queue;
461         hw = rxq->hw;
462
463         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
464
465         if (rxq->stopped) {
466                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
467                 return 0;
468         }
469
470         while (rcd->gen == rxq->comp_ring.gen) {
471
472                 if (nb_rx >= nb_pkts)
473                         break;
474                 idx = rcd->rxdIdx;
475                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
476                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
477                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
478
479                 if (rcd->sop != 1 || rcd->eop != 1) {
480                         rte_pktmbuf_free_seg(rbi->m);
481
482                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
483                         goto rcd_done;
484
485                 } else {
486
487                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
488
489 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
490                         VMXNET3_ASSERT(rcd->len <= rxd->len);
491                         VMXNET3_ASSERT(rbi->m);
492 #endif
493                         if (rcd->len == 0) {
494                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
495                                            ring_idx, idx);
496 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
497                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
498 #endif
499                                 rte_pktmbuf_free_seg(rbi->m);
500
501                                 goto rcd_done;
502                         }
503
504                         /* Assuming a packet is coming in a single packet buffer */
505                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
506                                 PMD_RX_LOG(DEBUG,
507                                            "Alert : Misbehaving device, incorrect "
508                                            " buffer type used. iPacket dropped.");
509                                 rte_pktmbuf_free_seg(rbi->m);
510                                 goto rcd_done;
511                         }
512 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
513                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
514 #endif
515                         /* Get the packet buffer pointer from buf_info */
516                         rxm = rbi->m;
517
518                         /* Clear descriptor associated buf_info to be reused */
519                         rbi->m = NULL;
520                         rbi->bufPA = 0;
521
522                         /* Update the index that we received a packet */
523                         rxq->cmd_ring[ring_idx].next2comp = idx;
524
525                         /* For RCD with EOP set, check if there is frame error */
526                         if (rcd->err) {
527                                 rxq->stats.drop_total++;
528                                 rxq->stats.drop_err++;
529
530                                 if (!rcd->fcs) {
531                                         rxq->stats.drop_fcs++;
532                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
533                                 }
534                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
535                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
536                                                  rxq->comp_ring.base), rcd->rxdIdx);
537                                 rte_pktmbuf_free_seg(rxm);
538
539                                 goto rcd_done;
540                         }
541
542                         /* Check for hardware stripped VLAN tag */
543                         if (rcd->ts) {
544
545                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.",
546                                            rcd->tci);
547                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
548
549 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
550                                 VMXNET3_ASSERT(rxm &&
551                                                rte_pktmbuf_mtod(rxm, void *));
552 #endif
553                                 /* Copy vlan tag in packet buffer */
554                                 rxm->vlan_tci = rte_le_to_cpu_16(
555                                                 (uint16_t)rcd->tci);
556
557                         } else
558                                 rxm->ol_flags = 0;
559
560                         /* Initialize newly received packet buffer */
561                         rxm->port = rxq->port_id;
562                         rxm->nb_segs = 1;
563                         rxm->next = NULL;
564                         rxm->pkt_len = (uint16_t)rcd->len;
565                         rxm->data_len = (uint16_t)rcd->len;
566                         rxm->port = rxq->port_id;
567                         rxm->vlan_tci = 0;
568                         rxm->data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
569
570                         rx_pkts[nb_rx++] = rxm;
571
572 rcd_done:
573                         rxq->cmd_ring[ring_idx].next2comp = idx;
574                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
575
576                         /* It's time to allocate some new buf and renew descriptors */
577                         vmxnet3_post_rx_bufs(rxq, ring_idx);
578                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
579                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
580                                                        rxq->cmd_ring[ring_idx].next2fill);
581                         }
582
583                         /* Advance to the next descriptor in comp_ring */
584                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
585
586                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
587                         nb_rxd++;
588                         if (nb_rxd > rxq->cmd_ring[0].size) {
589                                 PMD_RX_LOG(ERR,
590                                            "Used up quota of receiving packets,"
591                                            " relinquish control.");
592                                 break;
593                         }
594                 }
595         }
596
597         return nb_rx;
598 }
599
600 /*
601  * Create memzone for device rings. malloc can't be used as the physical address is
602  * needed. If the memzone is already created, then this function returns a ptr
603  * to the old one.
604  */
605 static const struct rte_memzone *
606 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
607                       uint16_t queue_id, uint32_t ring_size, int socket_id)
608 {
609         char z_name[RTE_MEMZONE_NAMESIZE];
610         const struct rte_memzone *mz;
611
612         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
613                         dev->driver->pci_drv.name, ring_name,
614                         dev->data->port_id, queue_id);
615
616         mz = rte_memzone_lookup(z_name);
617         if (mz)
618                 return mz;
619
620         return rte_memzone_reserve_aligned(z_name, ring_size,
621                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
622 }
623
624 int
625 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
626                            uint16_t queue_idx,
627                            uint16_t nb_desc,
628                            unsigned int socket_id,
629                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
630 {
631         struct vmxnet3_hw     *hw = dev->data->dev_private;
632         const struct rte_memzone *mz;
633         struct vmxnet3_tx_queue *txq;
634         struct vmxnet3_cmd_ring *ring;
635         struct vmxnet3_comp_ring *comp_ring;
636         int size;
637
638         PMD_INIT_FUNC_TRACE();
639
640         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
641             ETH_TXQ_FLAGS_NOMULTSEGS) {
642                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
643                 return -EINVAL;
644         }
645
646         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
647             ETH_TXQ_FLAGS_NOOFFLOADS) {
648                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
649                 return -EINVAL;
650         }
651
652         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
653         if (txq == NULL) {
654                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
655                 return -ENOMEM;
656         }
657
658         txq->queue_id = queue_idx;
659         txq->port_id = dev->data->port_id;
660         txq->shared = &hw->tqd_start[queue_idx];
661         txq->hw = hw;
662         txq->qid = queue_idx;
663         txq->stopped = TRUE;
664
665         ring = &txq->cmd_ring;
666         comp_ring = &txq->comp_ring;
667
668         /* Tx vmxnet ring length should be between 512-4096 */
669         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
670                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
671                              VMXNET3_DEF_TX_RING_SIZE);
672                 return -EINVAL;
673         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
674                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
675                              VMXNET3_TX_RING_MAX_SIZE);
676                 return -EINVAL;
677         } else {
678                 ring->size = nb_desc;
679                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
680         }
681         comp_ring->size = ring->size;
682
683         /* Tx vmxnet rings structure initialization*/
684         ring->next2fill = 0;
685         ring->next2comp = 0;
686         ring->gen = VMXNET3_INIT_GEN;
687         comp_ring->next2proc = 0;
688         comp_ring->gen = VMXNET3_INIT_GEN;
689
690         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
691         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
692
693         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
694         if (mz == NULL) {
695                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
696                 return -ENOMEM;
697         }
698         memset(mz->addr, 0, mz->len);
699
700         /* cmd_ring initialization */
701         ring->base = mz->addr;
702         ring->basePA = mz->phys_addr;
703
704         /* comp_ring initialization */
705         comp_ring->base = ring->base + ring->size;
706         comp_ring->basePA = ring->basePA +
707                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
708
709         /* cmd_ring0 buf_info allocation */
710         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
711                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
712         if (ring->buf_info == NULL) {
713                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
714                 return -ENOMEM;
715         }
716
717         /* Update the data portion with txq */
718         dev->data->tx_queues[queue_idx] = txq;
719
720         return 0;
721 }
722
723 int
724 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
725                            uint16_t queue_idx,
726                            uint16_t nb_desc,
727                            unsigned int socket_id,
728                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
729                            struct rte_mempool *mp)
730 {
731         const struct rte_memzone *mz;
732         struct vmxnet3_rx_queue *rxq;
733         struct vmxnet3_hw     *hw = dev->data->dev_private;
734         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
735         struct vmxnet3_comp_ring *comp_ring;
736         int size;
737         uint8_t i;
738         char mem_name[32];
739         uint16_t buf_size;
740         struct rte_pktmbuf_pool_private *mbp_priv;
741
742         PMD_INIT_FUNC_TRACE();
743
744         mbp_priv = (struct rte_pktmbuf_pool_private *)
745                 rte_mempool_get_priv(mp);
746         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
747                                RTE_PKTMBUF_HEADROOM);
748
749         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
750                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
751                              "VMXNET3 don't support scatter packets yet",
752                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
753                 return -EINVAL;
754         }
755
756         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
757         if (rxq == NULL) {
758                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
759                 return -ENOMEM;
760         }
761
762         rxq->mp = mp;
763         rxq->queue_id = queue_idx;
764         rxq->port_id = dev->data->port_id;
765         rxq->shared = &hw->rqd_start[queue_idx];
766         rxq->hw = hw;
767         rxq->qid1 = queue_idx;
768         rxq->qid2 = queue_idx + hw->num_rx_queues;
769         rxq->stopped = TRUE;
770
771         ring0 = &rxq->cmd_ring[0];
772         ring1 = &rxq->cmd_ring[1];
773         comp_ring = &rxq->comp_ring;
774
775         /* Rx vmxnet rings length should be between 256-4096 */
776         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
777                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
778                 return -EINVAL;
779         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
780                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
781                 return -EINVAL;
782         } else {
783                 ring0->size = nb_desc;
784                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
785                 ring1->size = ring0->size;
786         }
787
788         comp_ring->size = ring0->size + ring1->size;
789
790         /* Rx vmxnet rings structure initialization */
791         ring0->next2fill = 0;
792         ring1->next2fill = 0;
793         ring0->next2comp = 0;
794         ring1->next2comp = 0;
795         ring0->gen = VMXNET3_INIT_GEN;
796         ring1->gen = VMXNET3_INIT_GEN;
797         comp_ring->next2proc = 0;
798         comp_ring->gen = VMXNET3_INIT_GEN;
799
800         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
801         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
802
803         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
804         if (mz == NULL) {
805                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
806                 return -ENOMEM;
807         }
808         memset(mz->addr, 0, mz->len);
809
810         /* cmd_ring0 initialization */
811         ring0->base = mz->addr;
812         ring0->basePA = mz->phys_addr;
813
814         /* cmd_ring1 initialization */
815         ring1->base = ring0->base + ring0->size;
816         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
817
818         /* comp_ring initialization */
819         comp_ring->base = ring1->base +  ring1->size;
820         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
821                 ring1->size;
822
823         /* cmd_ring0-cmd_ring1 buf_info allocation */
824         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
825
826                 ring = &rxq->cmd_ring[i];
827                 ring->rid = i;
828                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
829
830                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
831                 if (ring->buf_info == NULL) {
832                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
833                         return -ENOMEM;
834                 }
835         }
836
837         /* Update the data portion with rxq */
838         dev->data->rx_queues[queue_idx] = rxq;
839
840         return 0;
841 }
842
843 /*
844  * Initializes Receive Unit
845  * Load mbufs in rx queue in advance
846  */
847 int
848 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
849 {
850         struct vmxnet3_hw *hw = dev->data->dev_private;
851
852         int i, ret;
853         uint8_t j;
854
855         PMD_INIT_FUNC_TRACE();
856
857         for (i = 0; i < hw->num_rx_queues; i++) {
858                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
859
860                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
861                         /* Passing 0 as alloc_num will allocate full ring */
862                         ret = vmxnet3_post_rx_bufs(rxq, j);
863                         if (ret <= 0) {
864                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
865                                 return -ret;
866                         }
867                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
868                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
869                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
870                                                        rxq->cmd_ring[j].next2fill);
871                         }
872                 }
873                 rxq->stopped = FALSE;
874         }
875
876         for (i = 0; i < dev->data->nb_tx_queues; i++) {
877                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
878
879                 txq->stopped = FALSE;
880         }
881
882         return 0;
883 }
884
885 static uint8_t rss_intel_key[40] = {
886         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
887         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
888         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
889         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
890         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
891 };
892
893 /*
894  * Configure RSS feature
895  */
896 int
897 vmxnet3_rss_configure(struct rte_eth_dev *dev)
898 {
899 #define VMXNET3_RSS_OFFLOAD_ALL ( \
900                 ETH_RSS_IPV4 | \
901                 ETH_RSS_IPV4_TCP | \
902                 ETH_RSS_IPV6 | \
903                 ETH_RSS_IPV6_TCP)
904
905         struct vmxnet3_hw *hw = dev->data->dev_private;
906         struct VMXNET3_RSSConf *dev_rss_conf;
907         struct rte_eth_rss_conf *port_rss_conf;
908         uint64_t rss_hf;
909         uint8_t i, j;
910
911         PMD_INIT_FUNC_TRACE();
912
913         dev_rss_conf = hw->rss_conf;
914         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
915
916         /* loading hashFunc */
917         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
918         /* loading hashKeySize */
919         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
920         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
921         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
922
923         if (port_rss_conf->rss_key == NULL) {
924                 /* Default hash key */
925                 port_rss_conf->rss_key = rss_intel_key;
926         }
927
928         /* loading hashKey */
929         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
930
931         /* loading indTable */
932         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
933                 if (j == dev->data->nb_rx_queues)
934                         j = 0;
935                 dev_rss_conf->indTable[i] = j;
936         }
937
938         /* loading hashType */
939         dev_rss_conf->hashType = 0;
940         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
941         if (rss_hf & ETH_RSS_IPV4)
942                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
943         if (rss_hf & ETH_RSS_IPV4_TCP)
944                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
945         if (rss_hf & ETH_RSS_IPV6)
946                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
947         if (rss_hf & ETH_RSS_IPV6_TCP)
948                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
949
950         return VMXNET3_SUCCESS;
951 }
952
953 /*
954  * Configure VLAN Filter feature
955  */
956 int
957 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
958 {
959         uint8_t i;
960         struct vmxnet3_hw *hw = dev->data->dev_private;
961         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
962
963         PMD_INIT_FUNC_TRACE();
964
965         /* Verify if this tag is already set */
966         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
967                 /* Filter all vlan tags out by default */
968                 vf_table[i] = 0;
969                 /* To-Do: Provide another routine in dev_ops for user config */
970
971                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
972                                         dev->data->port_id, vf_table[i]);
973         }
974
975         return VMXNET3_SUCCESS;
976 }