vmxnet3: remove mtu check
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
82
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
85
86 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
87
88 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
89 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
93 #endif
94
95 static inline struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
97 {
98         struct rte_mbuf *m;
99
100         m = __rte_mbuf_raw_alloc(mp);
101         __rte_mbuf_sanity_check_raw(m, 0);
102         return m;
103 }
104
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
106 static void
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
108 {
109         uint32_t avail = 0;
110
111         if (rxq == NULL)
112                 return;
113
114         PMD_RX_LOG(DEBUG,
115                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG,
118                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119                    (unsigned long)rxq->cmd_ring[0].basePA,
120                    (unsigned long)rxq->cmd_ring[1].basePA,
121                    (unsigned long)rxq->comp_ring.basePA);
122
123         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
124         PMD_RX_LOG(DEBUG,
125                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[0].size, avail,
127                    rxq->comp_ring.next2proc,
128                    rxq->cmd_ring[0].size - avail);
129
130         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133                    rxq->cmd_ring[1].size - avail);
134
135 }
136
137 static void
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 {
140         uint32_t avail = 0;
141
142         if (txq == NULL)
143                 return;
144
145         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148                    (unsigned long)txq->cmd_ring.basePA,
149                    (unsigned long)txq->comp_ring.basePA,
150                    (unsigned long)txq->data_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174 }
175
176 static void
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
178 {
179         vmxnet3_cmd_ring_release_mbufs(ring);
180         rte_free(ring->buf_info);
181         ring->buf_info = NULL;
182 }
183
184
185 void
186 vmxnet3_dev_tx_queue_release(void *txq)
187 {
188         vmxnet3_tx_queue_t *tq = txq;
189
190         if (tq != NULL) {
191                 /* Release the cmd_ring */
192                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
193         }
194 }
195
196 void
197 vmxnet3_dev_rx_queue_release(void *rxq)
198 {
199         int i;
200         vmxnet3_rx_queue_t *rq = rxq;
201
202         if (rq != NULL) {
203                 /* Release both the cmd_rings */
204                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
206         }
207 }
208
209 static void
210 vmxnet3_dev_tx_queue_reset(void *txq)
211 {
212         vmxnet3_tx_queue_t *tq = txq;
213         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
233
234         memset(ring->base, 0, size);
235 }
236
237 static void
238 vmxnet3_dev_rx_queue_reset(void *rxq)
239 {
240         int i;
241         vmxnet3_rx_queue_t *rq = rxq;
242         struct vmxnet3_cmd_ring *ring0, *ring1;
243         struct vmxnet3_comp_ring *comp_ring;
244         int size;
245
246         if (rq != NULL) {
247                 /* Release both the cmd_rings mbufs */
248                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250         }
251
252         ring0 = &rq->cmd_ring[0];
253         ring1 = &rq->cmd_ring[1];
254         comp_ring = &rq->comp_ring;
255
256         /* Rx vmxnet rings structure initialization */
257         ring0->next2fill = 0;
258         ring1->next2fill = 0;
259         ring0->next2comp = 0;
260         ring1->next2comp = 0;
261         ring0->gen = VMXNET3_INIT_GEN;
262         ring1->gen = VMXNET3_INIT_GEN;
263         comp_ring->next2proc = 0;
264         comp_ring->gen = VMXNET3_INIT_GEN;
265
266         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static inline void
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305                 (comp_ring->base + comp_ring->next2proc);
306
307         while (tcd->gen == comp_ring->gen) {
308
309                 /* Release cmd_ring descriptor and free mbuf */
310 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
311                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
312 #endif
313                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
314                 if (unlikely(mbuf == NULL))
315                         rte_panic("EOP desc does not point to a valid mbuf");
316                 else
317                         rte_pktmbuf_free(mbuf);
318
319
320                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
321                 /* Mark the txd for which tcd was generated as completed */
322                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
325                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
326                                                     comp_ring->next2proc);
327                 completed++;
328         }
329
330         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
331 }
332
333 uint16_t
334 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
335                   uint16_t nb_pkts)
336 {
337         uint16_t nb_tx;
338         Vmxnet3_TxDesc *txd = NULL;
339         vmxnet3_buf_info_t *tbi = NULL;
340         struct vmxnet3_hw *hw;
341         struct rte_mbuf *txm;
342         vmxnet3_tx_queue_t *txq = tx_queue;
343
344         hw = txq->hw;
345
346         if (unlikely(txq->stopped)) {
347                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
348                 return 0;
349         }
350
351         /* Free up the comp_descriptors aggressively */
352         vmxnet3_tq_tx_complete(txq);
353
354         nb_tx = 0;
355         while (nb_tx < nb_pkts) {
356
357                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
358                         int copy_size = 0;
359
360                         txm = tx_pkts[nb_tx];
361                         /* Don't support scatter packets yet, free them if met */
362                         if (txm->nb_segs != 1) {
363                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
364                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
365                                 txq->stats.drop_total++;
366
367                                 nb_tx++;
368                                 continue;
369                         }
370
371                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
372                         if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
373                                 struct Vmxnet3_TxDataDesc *tdd;
374
375                                 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
376                                 copy_size = rte_pktmbuf_pkt_len(txm);
377                                 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
378                         }
379
380                         /* Fill the tx descriptor */
381                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
382                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
383                         if (copy_size)
384                                 txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
385                                                         txq->cmd_ring.next2fill *
386                                                         sizeof(struct Vmxnet3_TxDataDesc));
387                         else
388                                 txd->addr = tbi->bufPA;
389                         txd->len = txm->data_len;
390
391                         /* Mark the last descriptor as End of Packet. */
392                         txd->cq = 1;
393                         txd->eop = 1;
394
395                         /* Add VLAN tag if requested */
396                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
397                                 txd->ti = 1;
398                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
399                         }
400
401                         /* Record current mbuf for freeing it later in tx complete */
402 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
403                         VMXNET3_ASSERT(txm);
404 #endif
405                         tbi->m = txm;
406
407                         /* Set the offloading mode to default */
408                         txd->hlen = 0;
409                         txd->om = VMXNET3_OM_NONE;
410                         txd->msscof = 0;
411
412                         /* finally flip the GEN bit of the SOP desc  */
413                         txd->gen = txq->cmd_ring.gen;
414                         txq->shared->ctrl.txNumDeferred++;
415
416                         /* move to the next2fill descriptor */
417                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
418                         nb_tx++;
419
420                 } else {
421                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
422                         txq->stats.drop_total += (nb_pkts - nb_tx);
423                         break;
424                 }
425         }
426
427         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
428
429         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
430
431                 txq->shared->ctrl.txNumDeferred = 0;
432                 /* Notify vSwitch that packets are available. */
433                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
434                                        txq->cmd_ring.next2fill);
435         }
436
437         return nb_tx;
438 }
439
440 /*
441  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
442  *  so that device can receive packets in those buffers.
443  *      Ring layout:
444  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
445  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
446  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
447  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
448  *      only for LRO.
449  *
450  */
451 static inline int
452 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
453 {
454         int err = 0;
455         uint32_t i = 0, val = 0;
456         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
457
458         if (ring_id == 0) {
459                 /* Usually: One HEAD type buf per packet
460                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
461                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
462                  */
463
464                 /* We use single packet buffer so all heads here */
465                 val = VMXNET3_RXD_BTYPE_HEAD;
466         } else {
467                 /* All BODY type buffers for 2nd ring */
468                 val = VMXNET3_RXD_BTYPE_BODY;
469         }
470
471         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
472                 struct Vmxnet3_RxDesc *rxd;
473                 struct rte_mbuf *mbuf;
474                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
475
476                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
477
478                 /* Allocate blank mbuf for the current Rx Descriptor */
479                 mbuf = rte_rxmbuf_alloc(rxq->mp);
480                 if (unlikely(mbuf == NULL)) {
481                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
482                         rxq->stats.rx_buf_alloc_failure++;
483                         err = ENOMEM;
484                         break;
485                 }
486
487                 /*
488                  * Load mbuf pointer into buf_info[ring_size]
489                  * buf_info structure is equivalent to cookie for virtio-virtqueue
490                  */
491                 buf_info->m = mbuf;
492                 buf_info->len = (uint16_t)(mbuf->buf_len -
493                                            RTE_PKTMBUF_HEADROOM);
494                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
495
496                 /* Load Rx Descriptor with the buffer's GPA */
497                 rxd->addr = buf_info->bufPA;
498
499                 /* After this point rxd->addr MUST not be NULL */
500                 rxd->btype = val;
501                 rxd->len = buf_info->len;
502                 /* Flip gen bit at the end to change ownership */
503                 rxd->gen = ring->gen;
504
505                 vmxnet3_cmd_ring_adv_next2fill(ring);
506                 i++;
507         }
508
509         /* Return error only if no buffers are posted at present */
510         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
511                 return -err;
512         else
513                 return i;
514 }
515
516 /*
517  * Process the Rx Completion Ring of given vmxnet3_rx_queue
518  * for nb_pkts burst and return the number of packets received
519  */
520 uint16_t
521 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
522 {
523         uint16_t nb_rx;
524         uint32_t nb_rxd, idx;
525         uint8_t ring_idx;
526         vmxnet3_rx_queue_t *rxq;
527         Vmxnet3_RxCompDesc *rcd;
528         vmxnet3_buf_info_t *rbi;
529         Vmxnet3_RxDesc *rxd;
530         struct rte_mbuf *rxm = NULL;
531         struct vmxnet3_hw *hw;
532
533         nb_rx = 0;
534         ring_idx = 0;
535         nb_rxd = 0;
536         idx = 0;
537
538         rxq = rx_queue;
539         hw = rxq->hw;
540
541         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
542
543         if (unlikely(rxq->stopped)) {
544                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
545                 return 0;
546         }
547
548         while (rcd->gen == rxq->comp_ring.gen) {
549                 if (nb_rx >= nb_pkts)
550                         break;
551
552                 idx = rcd->rxdIdx;
553                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
554                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
555                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
556
557                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
558                         rte_pktmbuf_free_seg(rbi->m);
559                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
560                         goto rcd_done;
561                 }
562
563                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
564
565 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
566                 VMXNET3_ASSERT(rcd->len <= rxd->len);
567                 VMXNET3_ASSERT(rbi->m);
568 #endif
569                 if (unlikely(rcd->len == 0)) {
570                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
571                                    ring_idx, idx);
572 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
573                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
574 #endif
575                         rte_pktmbuf_free_seg(rbi->m);
576                         goto rcd_done;
577                 }
578
579                 /* Assuming a packet is coming in a single packet buffer */
580                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
581                         PMD_RX_LOG(DEBUG,
582                                    "Alert : Misbehaving device, incorrect "
583                                    " buffer type used. iPacket dropped.");
584                         rte_pktmbuf_free_seg(rbi->m);
585                         goto rcd_done;
586                 }
587 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
588                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
589 #endif
590                 /* Get the packet buffer pointer from buf_info */
591                 rxm = rbi->m;
592
593                 /* Clear descriptor associated buf_info to be reused */
594                 rbi->m = NULL;
595                 rbi->bufPA = 0;
596
597                 /* Update the index that we received a packet */
598                 rxq->cmd_ring[ring_idx].next2comp = idx;
599
600                 /* For RCD with EOP set, check if there is frame error */
601                 if (unlikely(rcd->err)) {
602                         rxq->stats.drop_total++;
603                         rxq->stats.drop_err++;
604
605                         if (!rcd->fcs) {
606                                 rxq->stats.drop_fcs++;
607                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
608                         }
609                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
610                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
611                                          rxq->comp_ring.base), rcd->rxdIdx);
612                         rte_pktmbuf_free_seg(rxm);
613                         goto rcd_done;
614                 }
615
616                 /* Check for hardware stripped VLAN tag */
617                 if (rcd->ts) {
618                         PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
619                                    rcd->tci);
620                         rxm->ol_flags = PKT_RX_VLAN_PKT;
621                         /* Copy vlan tag in packet buffer */
622                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
623                 } else {
624                         rxm->ol_flags = 0;
625                         rxm->vlan_tci = 0;
626                 }
627
628                 /* Initialize newly received packet buffer */
629                 rxm->port = rxq->port_id;
630                 rxm->nb_segs = 1;
631                 rxm->next = NULL;
632                 rxm->pkt_len = (uint16_t)rcd->len;
633                 rxm->data_len = (uint16_t)rcd->len;
634                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
635
636                 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
637                 if (rcd->v4) {
638                         struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
639                         struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
640
641                         if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
642                                 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
643                         else
644                                 rxm->ol_flags |= PKT_RX_IPV4_HDR;
645
646                         if (!rcd->cnc) {
647                                 if (!rcd->ipc)
648                                         rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
649
650                                 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
651                                         rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
652                         }
653                 }
654
655                 rx_pkts[nb_rx++] = rxm;
656 rcd_done:
657                 rxq->cmd_ring[ring_idx].next2comp = idx;
658                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
659
660                 /* It's time to allocate some new buf and renew descriptors */
661                 vmxnet3_post_rx_bufs(rxq, ring_idx);
662                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
663                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
664                                                rxq->cmd_ring[ring_idx].next2fill);
665                 }
666
667                 /* Advance to the next descriptor in comp_ring */
668                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
669
670                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
671                 nb_rxd++;
672                 if (nb_rxd > rxq->cmd_ring[0].size) {
673                         PMD_RX_LOG(ERR,
674                                    "Used up quota of receiving packets,"
675                                    " relinquish control.");
676                         break;
677                 }
678         }
679
680         return nb_rx;
681 }
682
683 /*
684  * Create memzone for device rings. malloc can't be used as the physical address is
685  * needed. If the memzone is already created, then this function returns a ptr
686  * to the old one.
687  */
688 static const struct rte_memzone *
689 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
690                       uint16_t queue_id, uint32_t ring_size, int socket_id)
691 {
692         char z_name[RTE_MEMZONE_NAMESIZE];
693         const struct rte_memzone *mz;
694
695         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
696                         dev->driver->pci_drv.name, ring_name,
697                         dev->data->port_id, queue_id);
698
699         mz = rte_memzone_lookup(z_name);
700         if (mz)
701                 return mz;
702
703         return rte_memzone_reserve_aligned(z_name, ring_size,
704                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
705 }
706
707 int
708 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
709                            uint16_t queue_idx,
710                            uint16_t nb_desc,
711                            unsigned int socket_id,
712                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
713 {
714         struct vmxnet3_hw *hw = dev->data->dev_private;
715         const struct rte_memzone *mz;
716         struct vmxnet3_tx_queue *txq;
717         struct vmxnet3_cmd_ring *ring;
718         struct vmxnet3_comp_ring *comp_ring;
719         struct vmxnet3_data_ring *data_ring;
720         int size;
721
722         PMD_INIT_FUNC_TRACE();
723
724         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
725             ETH_TXQ_FLAGS_NOMULTSEGS) {
726                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
727                 return -EINVAL;
728         }
729
730         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
731             ETH_TXQ_FLAGS_NOXSUMS) {
732                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
733                 return -EINVAL;
734         }
735
736         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
737         if (txq == NULL) {
738                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
739                 return -ENOMEM;
740         }
741
742         txq->queue_id = queue_idx;
743         txq->port_id = dev->data->port_id;
744         txq->shared = &hw->tqd_start[queue_idx];
745         txq->hw = hw;
746         txq->qid = queue_idx;
747         txq->stopped = TRUE;
748
749         ring = &txq->cmd_ring;
750         comp_ring = &txq->comp_ring;
751         data_ring = &txq->data_ring;
752
753         /* Tx vmxnet ring length should be between 512-4096 */
754         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
755                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
756                              VMXNET3_DEF_TX_RING_SIZE);
757                 return -EINVAL;
758         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
759                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
760                              VMXNET3_TX_RING_MAX_SIZE);
761                 return -EINVAL;
762         } else {
763                 ring->size = nb_desc;
764                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
765         }
766         comp_ring->size = data_ring->size = ring->size;
767
768         /* Tx vmxnet rings structure initialization*/
769         ring->next2fill = 0;
770         ring->next2comp = 0;
771         ring->gen = VMXNET3_INIT_GEN;
772         comp_ring->next2proc = 0;
773         comp_ring->gen = VMXNET3_INIT_GEN;
774
775         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
776         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
777         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
778
779         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
780         if (mz == NULL) {
781                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
782                 return -ENOMEM;
783         }
784         memset(mz->addr, 0, mz->len);
785
786         /* cmd_ring initialization */
787         ring->base = mz->addr;
788         ring->basePA = mz->phys_addr;
789
790         /* comp_ring initialization */
791         comp_ring->base = ring->base + ring->size;
792         comp_ring->basePA = ring->basePA +
793                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
794
795         /* data_ring initialization */
796         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
797         data_ring->basePA = comp_ring->basePA +
798                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
799
800         /* cmd_ring0 buf_info allocation */
801         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
802                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
803         if (ring->buf_info == NULL) {
804                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
805                 return -ENOMEM;
806         }
807
808         /* Update the data portion with txq */
809         dev->data->tx_queues[queue_idx] = txq;
810
811         return 0;
812 }
813
814 int
815 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
816                            uint16_t queue_idx,
817                            uint16_t nb_desc,
818                            unsigned int socket_id,
819                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
820                            struct rte_mempool *mp)
821 {
822         const struct rte_memzone *mz;
823         struct vmxnet3_rx_queue *rxq;
824         struct vmxnet3_hw     *hw = dev->data->dev_private;
825         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
826         struct vmxnet3_comp_ring *comp_ring;
827         int size;
828         uint8_t i;
829         char mem_name[32];
830         uint16_t buf_size;
831
832         PMD_INIT_FUNC_TRACE();
833
834         buf_size = rte_pktmbuf_data_room_size(mp) -
835                 RTE_PKTMBUF_HEADROOM;
836
837         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
838                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
839                              "VMXNET3 don't support scatter packets yet",
840                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
841                 return -EINVAL;
842         }
843
844         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
845         if (rxq == NULL) {
846                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
847                 return -ENOMEM;
848         }
849
850         rxq->mp = mp;
851         rxq->queue_id = queue_idx;
852         rxq->port_id = dev->data->port_id;
853         rxq->shared = &hw->rqd_start[queue_idx];
854         rxq->hw = hw;
855         rxq->qid1 = queue_idx;
856         rxq->qid2 = queue_idx + hw->num_rx_queues;
857         rxq->stopped = TRUE;
858
859         ring0 = &rxq->cmd_ring[0];
860         ring1 = &rxq->cmd_ring[1];
861         comp_ring = &rxq->comp_ring;
862
863         /* Rx vmxnet rings length should be between 256-4096 */
864         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
865                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
866                 return -EINVAL;
867         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
868                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
869                 return -EINVAL;
870         } else {
871                 ring0->size = nb_desc;
872                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
873                 ring1->size = ring0->size;
874         }
875
876         comp_ring->size = ring0->size + ring1->size;
877
878         /* Rx vmxnet rings structure initialization */
879         ring0->next2fill = 0;
880         ring1->next2fill = 0;
881         ring0->next2comp = 0;
882         ring1->next2comp = 0;
883         ring0->gen = VMXNET3_INIT_GEN;
884         ring1->gen = VMXNET3_INIT_GEN;
885         comp_ring->next2proc = 0;
886         comp_ring->gen = VMXNET3_INIT_GEN;
887
888         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
889         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
890
891         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
892         if (mz == NULL) {
893                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
894                 return -ENOMEM;
895         }
896         memset(mz->addr, 0, mz->len);
897
898         /* cmd_ring0 initialization */
899         ring0->base = mz->addr;
900         ring0->basePA = mz->phys_addr;
901
902         /* cmd_ring1 initialization */
903         ring1->base = ring0->base + ring0->size;
904         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
905
906         /* comp_ring initialization */
907         comp_ring->base = ring1->base + ring1->size;
908         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
909                 ring1->size;
910
911         /* cmd_ring0-cmd_ring1 buf_info allocation */
912         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
913
914                 ring = &rxq->cmd_ring[i];
915                 ring->rid = i;
916                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
917
918                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
919                 if (ring->buf_info == NULL) {
920                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
921                         return -ENOMEM;
922                 }
923         }
924
925         /* Update the data portion with rxq */
926         dev->data->rx_queues[queue_idx] = rxq;
927
928         return 0;
929 }
930
931 /*
932  * Initializes Receive Unit
933  * Load mbufs in rx queue in advance
934  */
935 int
936 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
937 {
938         struct vmxnet3_hw *hw = dev->data->dev_private;
939
940         int i, ret;
941         uint8_t j;
942
943         PMD_INIT_FUNC_TRACE();
944
945         for (i = 0; i < hw->num_rx_queues; i++) {
946                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
947
948                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
949                         /* Passing 0 as alloc_num will allocate full ring */
950                         ret = vmxnet3_post_rx_bufs(rxq, j);
951                         if (ret <= 0) {
952                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
953                                 return -ret;
954                         }
955                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
956                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
957                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
958                                                        rxq->cmd_ring[j].next2fill);
959                         }
960                 }
961                 rxq->stopped = FALSE;
962         }
963
964         for (i = 0; i < dev->data->nb_tx_queues; i++) {
965                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
966
967                 txq->stopped = FALSE;
968         }
969
970         return 0;
971 }
972
973 static uint8_t rss_intel_key[40] = {
974         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
975         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
976         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
977         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
978         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
979 };
980
981 /*
982  * Configure RSS feature
983  */
984 int
985 vmxnet3_rss_configure(struct rte_eth_dev *dev)
986 {
987         struct vmxnet3_hw *hw = dev->data->dev_private;
988         struct VMXNET3_RSSConf *dev_rss_conf;
989         struct rte_eth_rss_conf *port_rss_conf;
990         uint64_t rss_hf;
991         uint8_t i, j;
992
993         PMD_INIT_FUNC_TRACE();
994
995         dev_rss_conf = hw->rss_conf;
996         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
997
998         /* loading hashFunc */
999         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1000         /* loading hashKeySize */
1001         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1002         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1003         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1004
1005         if (port_rss_conf->rss_key == NULL) {
1006                 /* Default hash key */
1007                 port_rss_conf->rss_key = rss_intel_key;
1008         }
1009
1010         /* loading hashKey */
1011         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1012
1013         /* loading indTable */
1014         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1015                 if (j == dev->data->nb_rx_queues)
1016                         j = 0;
1017                 dev_rss_conf->indTable[i] = j;
1018         }
1019
1020         /* loading hashType */
1021         dev_rss_conf->hashType = 0;
1022         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1023         if (rss_hf & ETH_RSS_IPV4)
1024                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1025         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1026                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1027         if (rss_hf & ETH_RSS_IPV6)
1028                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1029         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1030                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1031
1032         return VMXNET3_SUCCESS;
1033 }