4799f4d2a9d0588a1e5220175af9e458db34d8cf
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_ip.h>
69 #include <rte_udp.h>
70 #include <rte_tcp.h>
71 #include <rte_sctp.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
74
75 #include "vmxnet3/vmxnet3_defs.h"
76 #include "vmxnet3_ring.h"
77
78 #include "vmxnet3_logs.h"
79 #include "vmxnet3_ethdev.h"
80
81
82 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
83         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
84
85 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
86         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87
88 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
91 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 static inline struct rte_mbuf *
98 rte_rxmbuf_alloc(struct rte_mempool *mp)
99 {
100         struct rte_mbuf *m;
101
102         m = __rte_mbuf_raw_alloc(mp);
103         __rte_mbuf_sanity_check_raw(m, 0);
104         return m;
105 }
106
107 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 static void
109 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
110 {
111         uint32_t avail = 0;
112
113         if (rxq == NULL)
114                 return;
115
116         PMD_RX_LOG(DEBUG,
117                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
118                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
119         PMD_RX_LOG(DEBUG,
120                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
121                    (unsigned long)rxq->cmd_ring[0].basePA,
122                    (unsigned long)rxq->cmd_ring[1].basePA,
123                    (unsigned long)rxq->comp_ring.basePA);
124
125         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
126         PMD_RX_LOG(DEBUG,
127                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
128                    (uint32_t)rxq->cmd_ring[0].size, avail,
129                    rxq->comp_ring.next2proc,
130                    rxq->cmd_ring[0].size - avail);
131
132         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
133         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
134                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
135                    rxq->cmd_ring[1].size - avail);
136
137 }
138
139 static void
140 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
141 {
142         uint32_t avail = 0;
143
144         if (txq == NULL)
145                 return;
146
147         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
148                    txq->cmd_ring.base, txq->comp_ring.base);
149         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
150                    (unsigned long)txq->cmd_ring.basePA,
151                    (unsigned long)txq->comp_ring.basePA);
152
153         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155                    (uint32_t)txq->cmd_ring.size, avail,
156                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
157 }
158 #endif
159
160 static inline void
161 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 {
163         while (ring->next2comp != ring->next2fill) {
164                 /* No need to worry about tx desc ownership, device is quiesced by now. */
165                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
166
167                 if (buf_info->m) {
168                         rte_pktmbuf_free(buf_info->m);
169                         buf_info->m = NULL;
170                         buf_info->bufPA = 0;
171                         buf_info->len = 0;
172                 }
173                 vmxnet3_cmd_ring_adv_next2comp(ring);
174         }
175 }
176
177 static void
178 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 {
180         vmxnet3_cmd_ring_release_mbufs(ring);
181         rte_free(ring->buf_info);
182         ring->buf_info = NULL;
183 }
184
185
186 void
187 vmxnet3_dev_tx_queue_release(void *txq)
188 {
189         vmxnet3_tx_queue_t *tq = txq;
190
191         if (tq != NULL) {
192                 /* Release the cmd_ring */
193                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
194         }
195 }
196
197 void
198 vmxnet3_dev_rx_queue_release(void *rxq)
199 {
200         int i;
201         vmxnet3_rx_queue_t *rq = rxq;
202
203         if (rq != NULL) {
204                 /* Release both the cmd_rings */
205                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
206                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
207         }
208 }
209
210 static void
211 vmxnet3_dev_tx_queue_reset(void *txq)
212 {
213         vmxnet3_tx_queue_t *tq = txq;
214         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
215         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232
233         memset(ring->base, 0, size);
234 }
235
236 static void
237 vmxnet3_dev_rx_queue_reset(void *rxq)
238 {
239         int i;
240         vmxnet3_rx_queue_t *rq = rxq;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         int size;
244
245         if (rq != NULL) {
246                 /* Release both the cmd_rings mbufs */
247                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249         }
250
251         ring0 = &rq->cmd_ring[0];
252         ring1 = &rq->cmd_ring[1];
253         comp_ring = &rq->comp_ring;
254
255         /* Rx vmxnet rings structure initialization */
256         ring0->next2fill = 0;
257         ring1->next2fill = 0;
258         ring0->next2comp = 0;
259         ring1->next2comp = 0;
260         ring0->gen = VMXNET3_INIT_GEN;
261         ring1->gen = VMXNET3_INIT_GEN;
262         comp_ring->next2proc = 0;
263         comp_ring->gen = VMXNET3_INIT_GEN;
264
265         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
266         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static inline void
298 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
303         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
304                 (comp_ring->base + comp_ring->next2proc);
305
306         while (tcd->gen == comp_ring->gen) {
307
308                 /* Release cmd_ring descriptor and free mbuf */
309 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
310                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
311 #endif
312                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
313                 if (unlikely(mbuf == NULL))
314                         rte_panic("EOP desc does not point to a valid mbuf");
315                 else
316                         rte_pktmbuf_free(mbuf);
317
318
319                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
320                 /* Mark the txd for which tcd was generated as completed */
321                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
324                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
325                                                     comp_ring->next2proc);
326                 completed++;
327         }
328
329         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
330 }
331
332 uint16_t
333 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
334                   uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337         Vmxnet3_TxDesc *txd = NULL;
338         vmxnet3_buf_info_t *tbi = NULL;
339         struct vmxnet3_hw *hw;
340         struct rte_mbuf *txm;
341         vmxnet3_tx_queue_t *txq = tx_queue;
342
343         hw = txq->hw;
344
345         if (txq->stopped) {
346                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
347                 return 0;
348         }
349
350         /* Free up the comp_descriptors aggressively */
351         vmxnet3_tq_tx_complete(txq);
352
353         nb_tx = 0;
354         while (nb_tx < nb_pkts) {
355
356                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
357
358                         txm = tx_pkts[nb_tx];
359                         /* Don't support scatter packets yet, free them if met */
360                         if (txm->nb_segs != 1) {
361                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
362                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
363                                 txq->stats.drop_total++;
364
365                                 nb_tx++;
366                                 continue;
367                         }
368
369                         /* Needs to minus ether header len */
370                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
371                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
372                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
373                                 txq->stats.drop_total++;
374
375                                 nb_tx++;
376                                 continue;
377                         }
378
379                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
380
381                         /* Fill the tx descriptor */
382                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
383                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
384                         txd->addr = tbi->bufPA;
385                         txd->len = txm->data_len;
386
387                         /* Mark the last descriptor as End of Packet. */
388                         txd->cq = 1;
389                         txd->eop = 1;
390
391                         /* Add VLAN tag if requested */
392                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
393                                 txd->ti = 1;
394                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
395                         }
396
397                         /* Record current mbuf for freeing it later in tx complete */
398 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
399                         VMXNET3_ASSERT(txm);
400 #endif
401                         tbi->m = txm;
402
403                         /* Set the offloading mode to default */
404                         txd->hlen = 0;
405                         txd->om = VMXNET3_OM_NONE;
406                         txd->msscof = 0;
407
408                         /* finally flip the GEN bit of the SOP desc  */
409                         txd->gen = txq->cmd_ring.gen;
410                         txq->shared->ctrl.txNumDeferred++;
411
412                         /* move to the next2fill descriptor */
413                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
414                         nb_tx++;
415
416                 } else {
417                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
418                         txq->stats.drop_total += (nb_pkts - nb_tx);
419                         break;
420                 }
421         }
422
423         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
424
425         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
426
427                 txq->shared->ctrl.txNumDeferred = 0;
428                 /* Notify vSwitch that packets are available. */
429                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
430                                        txq->cmd_ring.next2fill);
431         }
432
433         return nb_tx;
434 }
435
436 /*
437  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
438  *  so that device can receive packets in those buffers.
439  *      Ring layout:
440  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
441  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
442  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
443  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
444  *      only for LRO.
445  *
446  */
447 static inline int
448 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
449 {
450         int err = 0;
451         uint32_t i = 0, val = 0;
452         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
453
454         if (ring_id == 0) {
455                 /* Usually: One HEAD type buf per packet
456                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
457                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
458                  */
459
460                 /* We use single packet buffer so all heads here */
461                 val = VMXNET3_RXD_BTYPE_HEAD;
462         } else {
463                 /* All BODY type buffers for 2nd ring */
464                 val = VMXNET3_RXD_BTYPE_BODY;
465         }
466
467         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
468                 struct Vmxnet3_RxDesc *rxd;
469                 struct rte_mbuf *mbuf;
470                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
471
472                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
473
474                 /* Allocate blank mbuf for the current Rx Descriptor */
475                 mbuf = rte_rxmbuf_alloc(rxq->mp);
476                 if (unlikely(mbuf == NULL)) {
477                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
478                         rxq->stats.rx_buf_alloc_failure++;
479                         err = ENOMEM;
480                         break;
481                 }
482
483                 /*
484                  * Load mbuf pointer into buf_info[ring_size]
485                  * buf_info structure is equivalent to cookie for virtio-virtqueue
486                  */
487                 buf_info->m = mbuf;
488                 buf_info->len = (uint16_t)(mbuf->buf_len -
489                                            RTE_PKTMBUF_HEADROOM);
490                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
491
492                 /* Load Rx Descriptor with the buffer's GPA */
493                 rxd->addr = buf_info->bufPA;
494
495                 /* After this point rxd->addr MUST not be NULL */
496                 rxd->btype = val;
497                 rxd->len = buf_info->len;
498                 /* Flip gen bit at the end to change ownership */
499                 rxd->gen = ring->gen;
500
501                 vmxnet3_cmd_ring_adv_next2fill(ring);
502                 i++;
503         }
504
505         /* Return error only if no buffers are posted at present */
506         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
507                 return -err;
508         else
509                 return i;
510 }
511
512 /*
513  * Process the Rx Completion Ring of given vmxnet3_rx_queue
514  * for nb_pkts burst and return the number of packets received
515  */
516 uint16_t
517 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
518 {
519         uint16_t nb_rx;
520         uint32_t nb_rxd, idx;
521         uint8_t ring_idx;
522         vmxnet3_rx_queue_t *rxq;
523         Vmxnet3_RxCompDesc *rcd;
524         vmxnet3_buf_info_t *rbi;
525         Vmxnet3_RxDesc *rxd;
526         struct rte_mbuf *rxm = NULL;
527         struct vmxnet3_hw *hw;
528
529         nb_rx = 0;
530         ring_idx = 0;
531         nb_rxd = 0;
532         idx = 0;
533
534         rxq = rx_queue;
535         hw = rxq->hw;
536
537         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
538
539         if (unlikely(rxq->stopped)) {
540                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
541                 return 0;
542         }
543
544         while (rcd->gen == rxq->comp_ring.gen) {
545                 if (nb_rx >= nb_pkts)
546                         break;
547
548                 idx = rcd->rxdIdx;
549                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
550                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
551                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
552
553                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
554                         rte_pktmbuf_free_seg(rbi->m);
555                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
556                         goto rcd_done;
557                 }
558
559                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
560
561 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
562                 VMXNET3_ASSERT(rcd->len <= rxd->len);
563                 VMXNET3_ASSERT(rbi->m);
564 #endif
565                 if (unlikely(rcd->len == 0)) {
566                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
567                                    ring_idx, idx);
568 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
569                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
570 #endif
571                         rte_pktmbuf_free_seg(rbi->m);
572                         goto rcd_done;
573                 }
574
575                 /* Assuming a packet is coming in a single packet buffer */
576                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
577                         PMD_RX_LOG(DEBUG,
578                                    "Alert : Misbehaving device, incorrect "
579                                    " buffer type used. iPacket dropped.");
580                         rte_pktmbuf_free_seg(rbi->m);
581                         goto rcd_done;
582                 }
583 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
584                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
585 #endif
586                 /* Get the packet buffer pointer from buf_info */
587                 rxm = rbi->m;
588
589                 /* Clear descriptor associated buf_info to be reused */
590                 rbi->m = NULL;
591                 rbi->bufPA = 0;
592
593                 /* Update the index that we received a packet */
594                 rxq->cmd_ring[ring_idx].next2comp = idx;
595
596                 /* For RCD with EOP set, check if there is frame error */
597                 if (unlikely(rcd->err)) {
598                         rxq->stats.drop_total++;
599                         rxq->stats.drop_err++;
600
601                         if (!rcd->fcs) {
602                                 rxq->stats.drop_fcs++;
603                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
604                         }
605                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
606                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
607                                          rxq->comp_ring.base), rcd->rxdIdx);
608                         rte_pktmbuf_free_seg(rxm);
609                         goto rcd_done;
610                 }
611
612                 /* Check for hardware stripped VLAN tag */
613                 if (rcd->ts) {
614                         PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
615                                    rcd->tci);
616                         rxm->ol_flags = PKT_RX_VLAN_PKT;
617                         /* Copy vlan tag in packet buffer */
618                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
619                 } else {
620                         rxm->ol_flags = 0;
621                         rxm->vlan_tci = 0;
622                 }
623
624                 /* Initialize newly received packet buffer */
625                 rxm->port = rxq->port_id;
626                 rxm->nb_segs = 1;
627                 rxm->next = NULL;
628                 rxm->pkt_len = (uint16_t)rcd->len;
629                 rxm->data_len = (uint16_t)rcd->len;
630                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
631
632                 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
633                 if (rcd->v4) {
634                         struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
635                         struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
636
637                         if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
638                                 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
639                         else
640                                 rxm->ol_flags |= PKT_RX_IPV4_HDR;
641
642                         if (!rcd->cnc) {
643                                 if (!rcd->ipc)
644                                         rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
645
646                                 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
647                                         rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
648                         }
649                 }
650
651                 rx_pkts[nb_rx++] = rxm;
652 rcd_done:
653                 rxq->cmd_ring[ring_idx].next2comp = idx;
654                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
655
656                 /* It's time to allocate some new buf and renew descriptors */
657                 vmxnet3_post_rx_bufs(rxq, ring_idx);
658                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
659                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
660                                                rxq->cmd_ring[ring_idx].next2fill);
661                 }
662
663                 /* Advance to the next descriptor in comp_ring */
664                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
665
666                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
667                 nb_rxd++;
668                 if (nb_rxd > rxq->cmd_ring[0].size) {
669                         PMD_RX_LOG(ERR,
670                                    "Used up quota of receiving packets,"
671                                    " relinquish control.");
672                         break;
673                 }
674         }
675
676         return nb_rx;
677 }
678
679 /*
680  * Create memzone for device rings. malloc can't be used as the physical address is
681  * needed. If the memzone is already created, then this function returns a ptr
682  * to the old one.
683  */
684 static const struct rte_memzone *
685 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
686                       uint16_t queue_id, uint32_t ring_size, int socket_id)
687 {
688         char z_name[RTE_MEMZONE_NAMESIZE];
689         const struct rte_memzone *mz;
690
691         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
692                         dev->driver->pci_drv.name, ring_name,
693                         dev->data->port_id, queue_id);
694
695         mz = rte_memzone_lookup(z_name);
696         if (mz)
697                 return mz;
698
699         return rte_memzone_reserve_aligned(z_name, ring_size,
700                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
701 }
702
703 int
704 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
705                            uint16_t queue_idx,
706                            uint16_t nb_desc,
707                            unsigned int socket_id,
708                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
709 {
710         struct vmxnet3_hw     *hw = dev->data->dev_private;
711         const struct rte_memzone *mz;
712         struct vmxnet3_tx_queue *txq;
713         struct vmxnet3_cmd_ring *ring;
714         struct vmxnet3_comp_ring *comp_ring;
715         int size;
716
717         PMD_INIT_FUNC_TRACE();
718
719         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
720             ETH_TXQ_FLAGS_NOMULTSEGS) {
721                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
722                 return -EINVAL;
723         }
724
725         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
726             ETH_TXQ_FLAGS_NOOFFLOADS) {
727                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
728                 return -EINVAL;
729         }
730
731         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
732         if (txq == NULL) {
733                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
734                 return -ENOMEM;
735         }
736
737         txq->queue_id = queue_idx;
738         txq->port_id = dev->data->port_id;
739         txq->shared = &hw->tqd_start[queue_idx];
740         txq->hw = hw;
741         txq->qid = queue_idx;
742         txq->stopped = TRUE;
743
744         ring = &txq->cmd_ring;
745         comp_ring = &txq->comp_ring;
746
747         /* Tx vmxnet ring length should be between 512-4096 */
748         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
749                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
750                              VMXNET3_DEF_TX_RING_SIZE);
751                 return -EINVAL;
752         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
753                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
754                              VMXNET3_TX_RING_MAX_SIZE);
755                 return -EINVAL;
756         } else {
757                 ring->size = nb_desc;
758                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
759         }
760         comp_ring->size = ring->size;
761
762         /* Tx vmxnet rings structure initialization*/
763         ring->next2fill = 0;
764         ring->next2comp = 0;
765         ring->gen = VMXNET3_INIT_GEN;
766         comp_ring->next2proc = 0;
767         comp_ring->gen = VMXNET3_INIT_GEN;
768
769         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
770         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
771
772         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
773         if (mz == NULL) {
774                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
775                 return -ENOMEM;
776         }
777         memset(mz->addr, 0, mz->len);
778
779         /* cmd_ring initialization */
780         ring->base = mz->addr;
781         ring->basePA = mz->phys_addr;
782
783         /* comp_ring initialization */
784         comp_ring->base = ring->base + ring->size;
785         comp_ring->basePA = ring->basePA +
786                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
787
788         /* cmd_ring0 buf_info allocation */
789         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
790                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
791         if (ring->buf_info == NULL) {
792                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
793                 return -ENOMEM;
794         }
795
796         /* Update the data portion with txq */
797         dev->data->tx_queues[queue_idx] = txq;
798
799         return 0;
800 }
801
802 int
803 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
804                            uint16_t queue_idx,
805                            uint16_t nb_desc,
806                            unsigned int socket_id,
807                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
808                            struct rte_mempool *mp)
809 {
810         const struct rte_memzone *mz;
811         struct vmxnet3_rx_queue *rxq;
812         struct vmxnet3_hw     *hw = dev->data->dev_private;
813         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
814         struct vmxnet3_comp_ring *comp_ring;
815         int size;
816         uint8_t i;
817         char mem_name[32];
818         uint16_t buf_size;
819         struct rte_pktmbuf_pool_private *mbp_priv;
820
821         PMD_INIT_FUNC_TRACE();
822
823         mbp_priv = (struct rte_pktmbuf_pool_private *)
824                 rte_mempool_get_priv(mp);
825         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
826                                RTE_PKTMBUF_HEADROOM);
827
828         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
829                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
830                              "VMXNET3 don't support scatter packets yet",
831                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
832                 return -EINVAL;
833         }
834
835         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
836         if (rxq == NULL) {
837                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
838                 return -ENOMEM;
839         }
840
841         rxq->mp = mp;
842         rxq->queue_id = queue_idx;
843         rxq->port_id = dev->data->port_id;
844         rxq->shared = &hw->rqd_start[queue_idx];
845         rxq->hw = hw;
846         rxq->qid1 = queue_idx;
847         rxq->qid2 = queue_idx + hw->num_rx_queues;
848         rxq->stopped = TRUE;
849
850         ring0 = &rxq->cmd_ring[0];
851         ring1 = &rxq->cmd_ring[1];
852         comp_ring = &rxq->comp_ring;
853
854         /* Rx vmxnet rings length should be between 256-4096 */
855         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
856                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
857                 return -EINVAL;
858         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
859                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
860                 return -EINVAL;
861         } else {
862                 ring0->size = nb_desc;
863                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
864                 ring1->size = ring0->size;
865         }
866
867         comp_ring->size = ring0->size + ring1->size;
868
869         /* Rx vmxnet rings structure initialization */
870         ring0->next2fill = 0;
871         ring1->next2fill = 0;
872         ring0->next2comp = 0;
873         ring1->next2comp = 0;
874         ring0->gen = VMXNET3_INIT_GEN;
875         ring1->gen = VMXNET3_INIT_GEN;
876         comp_ring->next2proc = 0;
877         comp_ring->gen = VMXNET3_INIT_GEN;
878
879         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
880         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
881
882         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
883         if (mz == NULL) {
884                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
885                 return -ENOMEM;
886         }
887         memset(mz->addr, 0, mz->len);
888
889         /* cmd_ring0 initialization */
890         ring0->base = mz->addr;
891         ring0->basePA = mz->phys_addr;
892
893         /* cmd_ring1 initialization */
894         ring1->base = ring0->base + ring0->size;
895         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
896
897         /* comp_ring initialization */
898         comp_ring->base = ring1->base +  ring1->size;
899         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
900                 ring1->size;
901
902         /* cmd_ring0-cmd_ring1 buf_info allocation */
903         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
904
905                 ring = &rxq->cmd_ring[i];
906                 ring->rid = i;
907                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
908
909                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
910                 if (ring->buf_info == NULL) {
911                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
912                         return -ENOMEM;
913                 }
914         }
915
916         /* Update the data portion with rxq */
917         dev->data->rx_queues[queue_idx] = rxq;
918
919         return 0;
920 }
921
922 /*
923  * Initializes Receive Unit
924  * Load mbufs in rx queue in advance
925  */
926 int
927 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
928 {
929         struct vmxnet3_hw *hw = dev->data->dev_private;
930
931         int i, ret;
932         uint8_t j;
933
934         PMD_INIT_FUNC_TRACE();
935
936         for (i = 0; i < hw->num_rx_queues; i++) {
937                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
938
939                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
940                         /* Passing 0 as alloc_num will allocate full ring */
941                         ret = vmxnet3_post_rx_bufs(rxq, j);
942                         if (ret <= 0) {
943                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
944                                 return -ret;
945                         }
946                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
947                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
948                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
949                                                        rxq->cmd_ring[j].next2fill);
950                         }
951                 }
952                 rxq->stopped = FALSE;
953         }
954
955         for (i = 0; i < dev->data->nb_tx_queues; i++) {
956                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
957
958                 txq->stopped = FALSE;
959         }
960
961         return 0;
962 }
963
964 static uint8_t rss_intel_key[40] = {
965         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
966         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
967         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
968         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
969         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
970 };
971
972 /*
973  * Configure RSS feature
974  */
975 int
976 vmxnet3_rss_configure(struct rte_eth_dev *dev)
977 {
978 #define VMXNET3_RSS_OFFLOAD_ALL ( \
979                 ETH_RSS_IPV4 | \
980                 ETH_RSS_IPV4_TCP | \
981                 ETH_RSS_IPV6 | \
982                 ETH_RSS_IPV6_TCP)
983
984         struct vmxnet3_hw *hw = dev->data->dev_private;
985         struct VMXNET3_RSSConf *dev_rss_conf;
986         struct rte_eth_rss_conf *port_rss_conf;
987         uint64_t rss_hf;
988         uint8_t i, j;
989
990         PMD_INIT_FUNC_TRACE();
991
992         dev_rss_conf = hw->rss_conf;
993         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
994
995         /* loading hashFunc */
996         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
997         /* loading hashKeySize */
998         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
999         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1000         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1001
1002         if (port_rss_conf->rss_key == NULL) {
1003                 /* Default hash key */
1004                 port_rss_conf->rss_key = rss_intel_key;
1005         }
1006
1007         /* loading hashKey */
1008         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1009
1010         /* loading indTable */
1011         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1012                 if (j == dev->data->nb_rx_queues)
1013                         j = 0;
1014                 dev_rss_conf->indTable[i] = j;
1015         }
1016
1017         /* loading hashType */
1018         dev_rss_conf->hashType = 0;
1019         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1020         if (rss_hf & ETH_RSS_IPV4)
1021                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1022         if (rss_hf & ETH_RSS_IPV4_TCP)
1023                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1024         if (rss_hf & ETH_RSS_IPV6)
1025                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1026         if (rss_hf & ETH_RSS_IPV6_TCP)
1027                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1028
1029         return VMXNET3_SUCCESS;
1030 }
1031
1032 /*
1033  * Configure VLAN Filter feature
1034  */
1035 int
1036 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1037 {
1038         uint8_t i;
1039         struct vmxnet3_hw *hw = dev->data->dev_private;
1040         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1041
1042         PMD_INIT_FUNC_TRACE();
1043
1044         /* Verify if this tag is already set */
1045         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1046                 /* Filter all vlan tags out by default */
1047                 vf_table[i] = 0;
1048                 /* To-Do: Provide another routine in dev_ops for user config */
1049
1050                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1051                                         dev->data->port_id, vf_table[i]);
1052         }
1053
1054         return VMXNET3_SUCCESS;
1055 }