vmxnet3: add Rx check offloads
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_ip.h>
69 #include <rte_udp.h>
70 #include <rte_tcp.h>
71 #include <rte_sctp.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
74
75 #include "vmxnet3/vmxnet3_defs.h"
76 #include "vmxnet3_ring.h"
77
78 #include "vmxnet3_logs.h"
79 #include "vmxnet3_ethdev.h"
80
81
82 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
83         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
84
85 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
86         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
87
88 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
89
90 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
91 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
92 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
93 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
94 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
95 #endif
96
97 static inline struct rte_mbuf *
98 rte_rxmbuf_alloc(struct rte_mempool *mp)
99 {
100         struct rte_mbuf *m;
101
102         m = __rte_mbuf_raw_alloc(mp);
103         __rte_mbuf_sanity_check_raw(m, 0);
104         return m;
105 }
106
107 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
108 static void
109 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
110 {
111         uint32_t avail = 0;
112
113         if (rxq == NULL)
114                 return;
115
116         PMD_RX_LOG(DEBUG,
117                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
118                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
119         PMD_RX_LOG(DEBUG,
120                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
121                    (unsigned long)rxq->cmd_ring[0].basePA,
122                    (unsigned long)rxq->cmd_ring[1].basePA,
123                    (unsigned long)rxq->comp_ring.basePA);
124
125         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
126         PMD_RX_LOG(DEBUG,
127                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
128                    (uint32_t)rxq->cmd_ring[0].size, avail,
129                    rxq->comp_ring.next2proc,
130                    rxq->cmd_ring[0].size - avail);
131
132         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
133         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
134                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
135                    rxq->cmd_ring[1].size - avail);
136
137 }
138
139 static void
140 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
141 {
142         uint32_t avail = 0;
143
144         if (txq == NULL)
145                 return;
146
147         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
148                    txq->cmd_ring.base, txq->comp_ring.base);
149         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
150                    (unsigned long)txq->cmd_ring.basePA,
151                    (unsigned long)txq->comp_ring.basePA);
152
153         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155                    (uint32_t)txq->cmd_ring.size, avail,
156                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
157 }
158 #endif
159
160 static inline void
161 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 {
163         while (ring->next2comp != ring->next2fill) {
164                 /* No need to worry about tx desc ownership, device is quiesced by now. */
165                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
166
167                 if (buf_info->m) {
168                         rte_pktmbuf_free(buf_info->m);
169                         buf_info->m = NULL;
170                         buf_info->bufPA = 0;
171                         buf_info->len = 0;
172                 }
173                 vmxnet3_cmd_ring_adv_next2comp(ring);
174         }
175 }
176
177 static void
178 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 {
180         vmxnet3_cmd_ring_release_mbufs(ring);
181         rte_free(ring->buf_info);
182         ring->buf_info = NULL;
183 }
184
185
186 void
187 vmxnet3_dev_tx_queue_release(void *txq)
188 {
189         vmxnet3_tx_queue_t *tq = txq;
190
191         if (tq != NULL) {
192                 /* Release the cmd_ring */
193                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
194         }
195 }
196
197 void
198 vmxnet3_dev_rx_queue_release(void *rxq)
199 {
200         int i;
201         vmxnet3_rx_queue_t *rq = rxq;
202
203         if (rq != NULL) {
204                 /* Release both the cmd_rings */
205                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
206                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
207         }
208 }
209
210 static void
211 vmxnet3_dev_tx_queue_reset(void *txq)
212 {
213         vmxnet3_tx_queue_t *tq = txq;
214         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
215         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232
233         memset(ring->base, 0, size);
234 }
235
236 static void
237 vmxnet3_dev_rx_queue_reset(void *rxq)
238 {
239         int i;
240         vmxnet3_rx_queue_t *rq = rxq;
241         struct vmxnet3_cmd_ring *ring0, *ring1;
242         struct vmxnet3_comp_ring *comp_ring;
243         int size;
244
245         if (rq != NULL) {
246                 /* Release both the cmd_rings mbufs */
247                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249         }
250
251         ring0 = &rq->cmd_ring[0];
252         ring1 = &rq->cmd_ring[1];
253         comp_ring = &rq->comp_ring;
254
255         /* Rx vmxnet rings structure initialization */
256         ring0->next2fill = 0;
257         ring1->next2fill = 0;
258         ring0->next2comp = 0;
259         ring1->next2comp = 0;
260         ring0->gen = VMXNET3_INIT_GEN;
261         ring1->gen = VMXNET3_INIT_GEN;
262         comp_ring->next2proc = 0;
263         comp_ring->gen = VMXNET3_INIT_GEN;
264
265         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
266         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
267
268         memset(ring0->base, 0, size);
269 }
270
271 void
272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274         unsigned i;
275
276         PMD_INIT_FUNC_TRACE();
277
278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
279                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281                 if (txq != NULL) {
282                         txq->stopped = TRUE;
283                         vmxnet3_dev_tx_queue_reset(txq);
284                 }
285         }
286
287         for (i = 0; i < dev->data->nb_rx_queues; i++) {
288                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290                 if (rxq != NULL) {
291                         rxq->stopped = TRUE;
292                         vmxnet3_dev_rx_queue_reset(rxq);
293                 }
294         }
295 }
296
297 static inline void
298 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
299 {
300         int completed = 0;
301         struct rte_mbuf *mbuf;
302         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
303         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
304                 (comp_ring->base + comp_ring->next2proc);
305
306         while (tcd->gen == comp_ring->gen) {
307
308                 /* Release cmd_ring descriptor and free mbuf */
309 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
310                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
311 #endif
312                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
313                 if (unlikely(mbuf == NULL))
314                         rte_panic("EOP desc does not point to a valid mbuf");
315                 else
316                         rte_pktmbuf_free(mbuf);
317
318
319                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
320                 /* Mark the txd for which tcd was generated as completed */
321                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
324                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
325                                                     comp_ring->next2proc);
326                 completed++;
327         }
328
329         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
330 }
331
332 uint16_t
333 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
334                   uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337         Vmxnet3_TxDesc *txd = NULL;
338         vmxnet3_buf_info_t *tbi = NULL;
339         struct vmxnet3_hw *hw;
340         struct rte_mbuf *txm;
341         vmxnet3_tx_queue_t *txq = tx_queue;
342
343         hw = txq->hw;
344
345         if (txq->stopped) {
346                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
347                 return 0;
348         }
349
350         /* Free up the comp_descriptors aggressively */
351         vmxnet3_tq_tx_complete(txq);
352
353         nb_tx = 0;
354         while (nb_tx < nb_pkts) {
355
356                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
357
358                         txm = tx_pkts[nb_tx];
359                         /* Don't support scatter packets yet, free them if met */
360                         if (txm->nb_segs != 1) {
361                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
362                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
363                                 txq->stats.drop_total++;
364
365                                 nb_tx++;
366                                 continue;
367                         }
368
369                         /* Needs to minus ether header len */
370                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
371                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
372                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
373                                 txq->stats.drop_total++;
374
375                                 nb_tx++;
376                                 continue;
377                         }
378
379                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
380
381                         /* Fill the tx descriptor */
382                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
383                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
384                         txd->addr = tbi->bufPA;
385                         txd->len = txm->data_len;
386
387                         /* Mark the last descriptor as End of Packet. */
388                         txd->cq = 1;
389                         txd->eop = 1;
390
391                         /* Add VLAN tag if requested */
392                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
393                                 txd->ti = 1;
394                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
395                         }
396
397                         /* Record current mbuf for freeing it later in tx complete */
398 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
399                         VMXNET3_ASSERT(txm);
400 #endif
401                         tbi->m = txm;
402
403                         /* Set the offloading mode to default */
404                         txd->hlen = 0;
405                         txd->om = VMXNET3_OM_NONE;
406                         txd->msscof = 0;
407
408                         /* finally flip the GEN bit of the SOP desc  */
409                         txd->gen = txq->cmd_ring.gen;
410                         txq->shared->ctrl.txNumDeferred++;
411
412                         /* move to the next2fill descriptor */
413                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
414                         nb_tx++;
415
416                 } else {
417                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
418                         txq->stats.drop_total += (nb_pkts - nb_tx);
419                         break;
420                 }
421         }
422
423         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
424
425         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
426
427                 txq->shared->ctrl.txNumDeferred = 0;
428                 /* Notify vSwitch that packets are available. */
429                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
430                                        txq->cmd_ring.next2fill);
431         }
432
433         return nb_tx;
434 }
435
436 /*
437  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
438  *  so that device can receive packets in those buffers.
439  *      Ring layout:
440  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
441  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
442  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
443  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
444  *      only for LRO.
445  *
446  */
447 static inline int
448 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
449 {
450         int err = 0;
451         uint32_t i = 0, val = 0;
452         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
453
454         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
455                 struct Vmxnet3_RxDesc *rxd;
456                 struct rte_mbuf *mbuf;
457                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
458
459                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
460
461                 if (ring->rid == 0) {
462                         /* Usually: One HEAD type buf per packet
463                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
464                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
465                          */
466
467                         /* We use single packet buffer so all heads here */
468                         val = VMXNET3_RXD_BTYPE_HEAD;
469                 } else {
470                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
471                         val = VMXNET3_RXD_BTYPE_BODY;
472                 }
473
474                 /* Allocate blank mbuf for the current Rx Descriptor */
475                 mbuf = rte_rxmbuf_alloc(rxq->mp);
476                 if (mbuf == NULL) {
477                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
478                         rxq->stats.rx_buf_alloc_failure++;
479                         err = ENOMEM;
480                         break;
481                 }
482
483                 /*
484                  * Load mbuf pointer into buf_info[ring_size]
485                  * buf_info structure is equivalent to cookie for virtio-virtqueue
486                  */
487                 buf_info->m = mbuf;
488                 buf_info->len = (uint16_t)(mbuf->buf_len -
489                                            RTE_PKTMBUF_HEADROOM);
490                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
491
492                 /* Load Rx Descriptor with the buffer's GPA */
493                 rxd->addr = buf_info->bufPA;
494
495                 /* After this point rxd->addr MUST not be NULL */
496                 rxd->btype = val;
497                 rxd->len = buf_info->len;
498                 /* Flip gen bit at the end to change ownership */
499                 rxd->gen = ring->gen;
500
501                 vmxnet3_cmd_ring_adv_next2fill(ring);
502                 i++;
503         }
504
505         /* Return error only if no buffers are posted at present */
506         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
507                 return -err;
508         else
509                 return i;
510 }
511
512 /*
513  * Process the Rx Completion Ring of given vmxnet3_rx_queue
514  * for nb_pkts burst and return the number of packets received
515  */
516 uint16_t
517 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
518 {
519         uint16_t nb_rx;
520         uint32_t nb_rxd, idx;
521         uint8_t ring_idx;
522         vmxnet3_rx_queue_t *rxq;
523         Vmxnet3_RxCompDesc *rcd;
524         vmxnet3_buf_info_t *rbi;
525         Vmxnet3_RxDesc *rxd;
526         struct rte_mbuf *rxm = NULL;
527         struct vmxnet3_hw *hw;
528
529         nb_rx = 0;
530         ring_idx = 0;
531         nb_rxd = 0;
532         idx = 0;
533
534         rxq = rx_queue;
535         hw = rxq->hw;
536
537         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
538
539         if (rxq->stopped) {
540                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
541                 return 0;
542         }
543
544         while (rcd->gen == rxq->comp_ring.gen) {
545
546                 if (nb_rx >= nb_pkts)
547                         break;
548                 idx = rcd->rxdIdx;
549                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
550                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
551                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
552
553                 if (rcd->sop != 1 || rcd->eop != 1) {
554                         rte_pktmbuf_free_seg(rbi->m);
555
556                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
557                         goto rcd_done;
558
559                 } else {
560
561                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
562
563 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
564                         VMXNET3_ASSERT(rcd->len <= rxd->len);
565                         VMXNET3_ASSERT(rbi->m);
566 #endif
567                         if (rcd->len == 0) {
568                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
569                                            ring_idx, idx);
570 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
571                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
572 #endif
573                                 rte_pktmbuf_free_seg(rbi->m);
574
575                                 goto rcd_done;
576                         }
577
578                         /* Assuming a packet is coming in a single packet buffer */
579                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
580                                 PMD_RX_LOG(DEBUG,
581                                            "Alert : Misbehaving device, incorrect "
582                                            " buffer type used. iPacket dropped.");
583                                 rte_pktmbuf_free_seg(rbi->m);
584                                 goto rcd_done;
585                         }
586 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
587                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
588 #endif
589                         /* Get the packet buffer pointer from buf_info */
590                         rxm = rbi->m;
591
592                         /* Clear descriptor associated buf_info to be reused */
593                         rbi->m = NULL;
594                         rbi->bufPA = 0;
595
596                         /* Update the index that we received a packet */
597                         rxq->cmd_ring[ring_idx].next2comp = idx;
598
599                         /* For RCD with EOP set, check if there is frame error */
600                         if (rcd->err) {
601                                 rxq->stats.drop_total++;
602                                 rxq->stats.drop_err++;
603
604                                 if (!rcd->fcs) {
605                                         rxq->stats.drop_fcs++;
606                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
607                                 }
608                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
609                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
610                                                  rxq->comp_ring.base), rcd->rxdIdx);
611                                 rte_pktmbuf_free_seg(rxm);
612
613                                 goto rcd_done;
614                         }
615
616                         /* Check for hardware stripped VLAN tag */
617                         if (rcd->ts) {
618                                 PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
619                                            rcd->tci);
620                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
621 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
622                                 VMXNET3_ASSERT(rxm &&
623                                                rte_pktmbuf_mtod(rxm, void *));
624 #endif
625                                 /* Copy vlan tag in packet buffer */
626                                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
627                         } else {
628                                 rxm->ol_flags = 0;
629                                 rxm->vlan_tci = 0;
630                         }
631
632                         /* Initialize newly received packet buffer */
633                         rxm->port = rxq->port_id;
634                         rxm->nb_segs = 1;
635                         rxm->next = NULL;
636                         rxm->pkt_len = (uint16_t)rcd->len;
637                         rxm->data_len = (uint16_t)rcd->len;
638                         rxm->port = rxq->port_id;
639                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
640
641                         /* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */
642                         if (rcd->v4) {
643                                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
644                                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
645
646                                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
647                                         rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
648                                 else
649                                         rxm->ol_flags |= PKT_RX_IPV4_HDR;
650
651                                 if (!rcd->cnc) {
652                                         if (!rcd->ipc)
653                                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
654
655                                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
656                                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
657                                 }
658                         }
659
660                         rx_pkts[nb_rx++] = rxm;
661 rcd_done:
662                         rxq->cmd_ring[ring_idx].next2comp = idx;
663                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
664
665                         /* It's time to allocate some new buf and renew descriptors */
666                         vmxnet3_post_rx_bufs(rxq, ring_idx);
667                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
668                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
669                                                        rxq->cmd_ring[ring_idx].next2fill);
670                         }
671
672                         /* Advance to the next descriptor in comp_ring */
673                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
674
675                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
676                         nb_rxd++;
677                         if (nb_rxd > rxq->cmd_ring[0].size) {
678                                 PMD_RX_LOG(ERR,
679                                            "Used up quota of receiving packets,"
680                                            " relinquish control.");
681                                 break;
682                         }
683                 }
684         }
685
686         return nb_rx;
687 }
688
689 /*
690  * Create memzone for device rings. malloc can't be used as the physical address is
691  * needed. If the memzone is already created, then this function returns a ptr
692  * to the old one.
693  */
694 static const struct rte_memzone *
695 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
696                       uint16_t queue_id, uint32_t ring_size, int socket_id)
697 {
698         char z_name[RTE_MEMZONE_NAMESIZE];
699         const struct rte_memzone *mz;
700
701         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
702                         dev->driver->pci_drv.name, ring_name,
703                         dev->data->port_id, queue_id);
704
705         mz = rte_memzone_lookup(z_name);
706         if (mz)
707                 return mz;
708
709         return rte_memzone_reserve_aligned(z_name, ring_size,
710                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
711 }
712
713 int
714 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
715                            uint16_t queue_idx,
716                            uint16_t nb_desc,
717                            unsigned int socket_id,
718                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
719 {
720         struct vmxnet3_hw     *hw = dev->data->dev_private;
721         const struct rte_memzone *mz;
722         struct vmxnet3_tx_queue *txq;
723         struct vmxnet3_cmd_ring *ring;
724         struct vmxnet3_comp_ring *comp_ring;
725         int size;
726
727         PMD_INIT_FUNC_TRACE();
728
729         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
730             ETH_TXQ_FLAGS_NOMULTSEGS) {
731                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
732                 return -EINVAL;
733         }
734
735         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
736             ETH_TXQ_FLAGS_NOOFFLOADS) {
737                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
738                 return -EINVAL;
739         }
740
741         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
742         if (txq == NULL) {
743                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
744                 return -ENOMEM;
745         }
746
747         txq->queue_id = queue_idx;
748         txq->port_id = dev->data->port_id;
749         txq->shared = &hw->tqd_start[queue_idx];
750         txq->hw = hw;
751         txq->qid = queue_idx;
752         txq->stopped = TRUE;
753
754         ring = &txq->cmd_ring;
755         comp_ring = &txq->comp_ring;
756
757         /* Tx vmxnet ring length should be between 512-4096 */
758         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
759                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
760                              VMXNET3_DEF_TX_RING_SIZE);
761                 return -EINVAL;
762         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
763                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
764                              VMXNET3_TX_RING_MAX_SIZE);
765                 return -EINVAL;
766         } else {
767                 ring->size = nb_desc;
768                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
769         }
770         comp_ring->size = ring->size;
771
772         /* Tx vmxnet rings structure initialization*/
773         ring->next2fill = 0;
774         ring->next2comp = 0;
775         ring->gen = VMXNET3_INIT_GEN;
776         comp_ring->next2proc = 0;
777         comp_ring->gen = VMXNET3_INIT_GEN;
778
779         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
780         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
781
782         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
783         if (mz == NULL) {
784                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
785                 return -ENOMEM;
786         }
787         memset(mz->addr, 0, mz->len);
788
789         /* cmd_ring initialization */
790         ring->base = mz->addr;
791         ring->basePA = mz->phys_addr;
792
793         /* comp_ring initialization */
794         comp_ring->base = ring->base + ring->size;
795         comp_ring->basePA = ring->basePA +
796                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
797
798         /* cmd_ring0 buf_info allocation */
799         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
800                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
801         if (ring->buf_info == NULL) {
802                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
803                 return -ENOMEM;
804         }
805
806         /* Update the data portion with txq */
807         dev->data->tx_queues[queue_idx] = txq;
808
809         return 0;
810 }
811
812 int
813 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
814                            uint16_t queue_idx,
815                            uint16_t nb_desc,
816                            unsigned int socket_id,
817                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
818                            struct rte_mempool *mp)
819 {
820         const struct rte_memzone *mz;
821         struct vmxnet3_rx_queue *rxq;
822         struct vmxnet3_hw     *hw = dev->data->dev_private;
823         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
824         struct vmxnet3_comp_ring *comp_ring;
825         int size;
826         uint8_t i;
827         char mem_name[32];
828         uint16_t buf_size;
829         struct rte_pktmbuf_pool_private *mbp_priv;
830
831         PMD_INIT_FUNC_TRACE();
832
833         mbp_priv = (struct rte_pktmbuf_pool_private *)
834                 rte_mempool_get_priv(mp);
835         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
836                                RTE_PKTMBUF_HEADROOM);
837
838         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
839                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
840                              "VMXNET3 don't support scatter packets yet",
841                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
842                 return -EINVAL;
843         }
844
845         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
846         if (rxq == NULL) {
847                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
848                 return -ENOMEM;
849         }
850
851         rxq->mp = mp;
852         rxq->queue_id = queue_idx;
853         rxq->port_id = dev->data->port_id;
854         rxq->shared = &hw->rqd_start[queue_idx];
855         rxq->hw = hw;
856         rxq->qid1 = queue_idx;
857         rxq->qid2 = queue_idx + hw->num_rx_queues;
858         rxq->stopped = TRUE;
859
860         ring0 = &rxq->cmd_ring[0];
861         ring1 = &rxq->cmd_ring[1];
862         comp_ring = &rxq->comp_ring;
863
864         /* Rx vmxnet rings length should be between 256-4096 */
865         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
866                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
867                 return -EINVAL;
868         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
869                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
870                 return -EINVAL;
871         } else {
872                 ring0->size = nb_desc;
873                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
874                 ring1->size = ring0->size;
875         }
876
877         comp_ring->size = ring0->size + ring1->size;
878
879         /* Rx vmxnet rings structure initialization */
880         ring0->next2fill = 0;
881         ring1->next2fill = 0;
882         ring0->next2comp = 0;
883         ring1->next2comp = 0;
884         ring0->gen = VMXNET3_INIT_GEN;
885         ring1->gen = VMXNET3_INIT_GEN;
886         comp_ring->next2proc = 0;
887         comp_ring->gen = VMXNET3_INIT_GEN;
888
889         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
890         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
891
892         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
893         if (mz == NULL) {
894                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
895                 return -ENOMEM;
896         }
897         memset(mz->addr, 0, mz->len);
898
899         /* cmd_ring0 initialization */
900         ring0->base = mz->addr;
901         ring0->basePA = mz->phys_addr;
902
903         /* cmd_ring1 initialization */
904         ring1->base = ring0->base + ring0->size;
905         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
906
907         /* comp_ring initialization */
908         comp_ring->base = ring1->base +  ring1->size;
909         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
910                 ring1->size;
911
912         /* cmd_ring0-cmd_ring1 buf_info allocation */
913         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
914
915                 ring = &rxq->cmd_ring[i];
916                 ring->rid = i;
917                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
918
919                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
920                 if (ring->buf_info == NULL) {
921                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
922                         return -ENOMEM;
923                 }
924         }
925
926         /* Update the data portion with rxq */
927         dev->data->rx_queues[queue_idx] = rxq;
928
929         return 0;
930 }
931
932 /*
933  * Initializes Receive Unit
934  * Load mbufs in rx queue in advance
935  */
936 int
937 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
938 {
939         struct vmxnet3_hw *hw = dev->data->dev_private;
940
941         int i, ret;
942         uint8_t j;
943
944         PMD_INIT_FUNC_TRACE();
945
946         for (i = 0; i < hw->num_rx_queues; i++) {
947                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
948
949                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
950                         /* Passing 0 as alloc_num will allocate full ring */
951                         ret = vmxnet3_post_rx_bufs(rxq, j);
952                         if (ret <= 0) {
953                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
954                                 return -ret;
955                         }
956                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
957                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
958                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
959                                                        rxq->cmd_ring[j].next2fill);
960                         }
961                 }
962                 rxq->stopped = FALSE;
963         }
964
965         for (i = 0; i < dev->data->nb_tx_queues; i++) {
966                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
967
968                 txq->stopped = FALSE;
969         }
970
971         return 0;
972 }
973
974 static uint8_t rss_intel_key[40] = {
975         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
976         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
977         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
978         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
979         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
980 };
981
982 /*
983  * Configure RSS feature
984  */
985 int
986 vmxnet3_rss_configure(struct rte_eth_dev *dev)
987 {
988 #define VMXNET3_RSS_OFFLOAD_ALL ( \
989                 ETH_RSS_IPV4 | \
990                 ETH_RSS_IPV4_TCP | \
991                 ETH_RSS_IPV6 | \
992                 ETH_RSS_IPV6_TCP)
993
994         struct vmxnet3_hw *hw = dev->data->dev_private;
995         struct VMXNET3_RSSConf *dev_rss_conf;
996         struct rte_eth_rss_conf *port_rss_conf;
997         uint64_t rss_hf;
998         uint8_t i, j;
999
1000         PMD_INIT_FUNC_TRACE();
1001
1002         dev_rss_conf = hw->rss_conf;
1003         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1004
1005         /* loading hashFunc */
1006         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1007         /* loading hashKeySize */
1008         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1009         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1010         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1011
1012         if (port_rss_conf->rss_key == NULL) {
1013                 /* Default hash key */
1014                 port_rss_conf->rss_key = rss_intel_key;
1015         }
1016
1017         /* loading hashKey */
1018         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1019
1020         /* loading indTable */
1021         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1022                 if (j == dev->data->nb_rx_queues)
1023                         j = 0;
1024                 dev_rss_conf->indTable[i] = j;
1025         }
1026
1027         /* loading hashType */
1028         dev_rss_conf->hashType = 0;
1029         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1030         if (rss_hf & ETH_RSS_IPV4)
1031                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1032         if (rss_hf & ETH_RSS_IPV4_TCP)
1033                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1034         if (rss_hf & ETH_RSS_IPV6)
1035                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1036         if (rss_hf & ETH_RSS_IPV6_TCP)
1037                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1038
1039         return VMXNET3_SUCCESS;
1040 }
1041
1042 /*
1043  * Configure VLAN Filter feature
1044  */
1045 int
1046 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1047 {
1048         uint8_t i;
1049         struct vmxnet3_hw *hw = dev->data->dev_private;
1050         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1051
1052         PMD_INIT_FUNC_TRACE();
1053
1054         /* Verify if this tag is already set */
1055         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1056                 /* Filter all vlan tags out by default */
1057                 vf_table[i] = 0;
1058                 /* To-Do: Provide another routine in dev_ops for user config */
1059
1060                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1061                                         dev->data->port_id, vf_table[i]);
1062         }
1063
1064         return VMXNET3_SUCCESS;
1065 }