add prefix to cache line macros
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_ip.h>
69 #include <rte_udp.h>
70 #include <rte_tcp.h>
71 #include <rte_sctp.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
74
75 #include "vmxnet3/vmxnet3_defs.h"
76 #include "vmxnet3_ring.h"
77
78 #include "vmxnet3_logs.h"
79 #include "vmxnet3_ethdev.h"
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83
84 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
85         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86
87 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88
89 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
90 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
91 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
92 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
93 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
94 #endif
95
96 static inline struct rte_mbuf *
97 rte_rxmbuf_alloc(struct rte_mempool *mp)
98 {
99         struct rte_mbuf *m;
100
101         m = __rte_mbuf_raw_alloc(mp);
102         __rte_mbuf_sanity_check_raw(m, 0);
103         return m;
104 }
105
106 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 static void
108 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 {
110         uint32_t avail = 0;
111
112         if (rxq == NULL)
113                 return;
114
115         PMD_RX_LOG(DEBUG,
116                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
117                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
120                    (unsigned long)rxq->cmd_ring[0].basePA,
121                    (unsigned long)rxq->cmd_ring[1].basePA,
122                    (unsigned long)rxq->comp_ring.basePA);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125         PMD_RX_LOG(DEBUG,
126                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
127                    (uint32_t)rxq->cmd_ring[0].size, avail,
128                    rxq->comp_ring.next2proc,
129                    rxq->cmd_ring[0].size - avail);
130
131         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
132         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
133                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
134                    rxq->cmd_ring[1].size - avail);
135
136 }
137
138 static void
139 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
140 {
141         uint32_t avail = 0;
142
143         if (txq == NULL)
144                 return;
145
146         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
147                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
148         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
149                    (unsigned long)txq->cmd_ring.basePA,
150                    (unsigned long)txq->comp_ring.basePA,
151                    (unsigned long)txq->data_ring.basePA);
152
153         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
154         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
155                    (uint32_t)txq->cmd_ring.size, avail,
156                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
157 }
158 #endif
159
160 static inline void
161 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
162 {
163         while (ring->next2comp != ring->next2fill) {
164                 /* No need to worry about tx desc ownership, device is quiesced by now. */
165                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
166
167                 if (buf_info->m) {
168                         rte_pktmbuf_free(buf_info->m);
169                         buf_info->m = NULL;
170                         buf_info->bufPA = 0;
171                         buf_info->len = 0;
172                 }
173                 vmxnet3_cmd_ring_adv_next2comp(ring);
174         }
175 }
176
177 static void
178 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
179 {
180         vmxnet3_cmd_ring_release_mbufs(ring);
181         rte_free(ring->buf_info);
182         ring->buf_info = NULL;
183 }
184
185
186 void
187 vmxnet3_dev_tx_queue_release(void *txq)
188 {
189         vmxnet3_tx_queue_t *tq = txq;
190
191         if (tq != NULL) {
192                 /* Release the cmd_ring */
193                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
194         }
195 }
196
197 void
198 vmxnet3_dev_rx_queue_release(void *rxq)
199 {
200         int i;
201         vmxnet3_rx_queue_t *rq = rxq;
202
203         if (rq != NULL) {
204                 /* Release both the cmd_rings */
205                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
206                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
207         }
208 }
209
210 static void
211 vmxnet3_dev_tx_queue_reset(void *txq)
212 {
213         vmxnet3_tx_queue_t *tq = txq;
214         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
215         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
216         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
217         int size;
218
219         if (tq != NULL) {
220                 /* Release the cmd_ring mbufs */
221                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
222         }
223
224         /* Tx vmxnet rings structure initialization*/
225         ring->next2fill = 0;
226         ring->next2comp = 0;
227         ring->gen = VMXNET3_INIT_GEN;
228         comp_ring->next2proc = 0;
229         comp_ring->gen = VMXNET3_INIT_GEN;
230
231         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
232         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
233         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
234
235         memset(ring->base, 0, size);
236 }
237
238 static void
239 vmxnet3_dev_rx_queue_reset(void *rxq)
240 {
241         int i;
242         vmxnet3_rx_queue_t *rq = rxq;
243         struct vmxnet3_cmd_ring *ring0, *ring1;
244         struct vmxnet3_comp_ring *comp_ring;
245         int size;
246
247         if (rq != NULL) {
248                 /* Release both the cmd_rings mbufs */
249                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
250                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
251         }
252
253         ring0 = &rq->cmd_ring[0];
254         ring1 = &rq->cmd_ring[1];
255         comp_ring = &rq->comp_ring;
256
257         /* Rx vmxnet rings structure initialization */
258         ring0->next2fill = 0;
259         ring1->next2fill = 0;
260         ring0->next2comp = 0;
261         ring1->next2comp = 0;
262         ring0->gen = VMXNET3_INIT_GEN;
263         ring1->gen = VMXNET3_INIT_GEN;
264         comp_ring->next2proc = 0;
265         comp_ring->gen = VMXNET3_INIT_GEN;
266
267         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
268         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
269
270         memset(ring0->base, 0, size);
271 }
272
273 void
274 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
275 {
276         unsigned i;
277
278         PMD_INIT_FUNC_TRACE();
279
280         for (i = 0; i < dev->data->nb_tx_queues; i++) {
281                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
282
283                 if (txq != NULL) {
284                         txq->stopped = TRUE;
285                         vmxnet3_dev_tx_queue_reset(txq);
286                 }
287         }
288
289         for (i = 0; i < dev->data->nb_rx_queues; i++) {
290                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
291
292                 if (rxq != NULL) {
293                         rxq->stopped = TRUE;
294                         vmxnet3_dev_rx_queue_reset(rxq);
295                 }
296         }
297 }
298
299 static inline void
300 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
301 {
302         int completed = 0;
303         struct rte_mbuf *mbuf;
304         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
305         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
306                 (comp_ring->base + comp_ring->next2proc);
307
308         while (tcd->gen == comp_ring->gen) {
309
310                 /* Release cmd_ring descriptor and free mbuf */
311 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
312                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
313 #endif
314                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
315                 if (unlikely(mbuf == NULL))
316                         rte_panic("EOP desc does not point to a valid mbuf");
317                 else
318                         rte_pktmbuf_free(mbuf);
319
320
321                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
322                 /* Mark the txd for which tcd was generated as completed */
323                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
324
325                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
326                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
327                                                     comp_ring->next2proc);
328                 completed++;
329         }
330
331         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
332 }
333
334 uint16_t
335 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
336                   uint16_t nb_pkts)
337 {
338         uint16_t nb_tx;
339         Vmxnet3_TxDesc *txd = NULL;
340         vmxnet3_buf_info_t *tbi = NULL;
341         struct vmxnet3_hw *hw;
342         struct rte_mbuf *txm;
343         vmxnet3_tx_queue_t *txq = tx_queue;
344
345         hw = txq->hw;
346
347         if (unlikely(txq->stopped)) {
348                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
349                 return 0;
350         }
351
352         /* Free up the comp_descriptors aggressively */
353         vmxnet3_tq_tx_complete(txq);
354
355         nb_tx = 0;
356         while (nb_tx < nb_pkts) {
357
358                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
359                         int copy_size = 0;
360
361                         txm = tx_pkts[nb_tx];
362                         /* Don't support scatter packets yet, free them if met */
363                         if (txm->nb_segs != 1) {
364                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
365                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
366                                 txq->stats.drop_total++;
367
368                                 nb_tx++;
369                                 continue;
370                         }
371
372                         /* Needs to minus ether header len */
373                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
374                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
375                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
376                                 txq->stats.drop_total++;
377
378                                 nb_tx++;
379                                 continue;
380                         }
381
382                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
383                         if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
384                                 struct Vmxnet3_TxDataDesc *tdd;
385
386                                 tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
387                                 copy_size = rte_pktmbuf_pkt_len(txm);
388                                 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
389                         }
390
391                         /* Fill the tx descriptor */
392                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
393                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
394                         if (copy_size)
395                                 txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
396                                                         txq->cmd_ring.next2fill *
397                                                         sizeof(struct Vmxnet3_TxDataDesc));
398                         else
399                                 txd->addr = tbi->bufPA;
400                         txd->len = txm->data_len;
401
402                         /* Mark the last descriptor as End of Packet. */
403                         txd->cq = 1;
404                         txd->eop = 1;
405
406                         /* Add VLAN tag if requested */
407                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
408                                 txd->ti = 1;
409                                 txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
410                         }
411
412                         /* Record current mbuf for freeing it later in tx complete */
413 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
414                         VMXNET3_ASSERT(txm);
415 #endif
416                         tbi->m = txm;
417
418                         /* Set the offloading mode to default */
419                         txd->hlen = 0;
420                         txd->om = VMXNET3_OM_NONE;
421                         txd->msscof = 0;
422
423                         /* finally flip the GEN bit of the SOP desc  */
424                         txd->gen = txq->cmd_ring.gen;
425                         txq->shared->ctrl.txNumDeferred++;
426
427                         /* move to the next2fill descriptor */
428                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
429                         nb_tx++;
430
431                 } else {
432                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
433                         txq->stats.drop_total += (nb_pkts - nb_tx);
434                         break;
435                 }
436         }
437
438         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
439
440         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
441
442                 txq->shared->ctrl.txNumDeferred = 0;
443                 /* Notify vSwitch that packets are available. */
444                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
445                                        txq->cmd_ring.next2fill);
446         }
447
448         return nb_tx;
449 }
450
451 /*
452  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
453  *  so that device can receive packets in those buffers.
454  *      Ring layout:
455  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
456  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
457  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
458  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
459  *      only for LRO.
460  *
461  */
462 static inline int
463 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
464 {
465         int err = 0;
466         uint32_t i = 0, val = 0;
467         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
468
469         if (ring_id == 0) {
470                 /* Usually: One HEAD type buf per packet
471                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
472                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
473                  */
474
475                 /* We use single packet buffer so all heads here */
476                 val = VMXNET3_RXD_BTYPE_HEAD;
477         } else {
478                 /* All BODY type buffers for 2nd ring */
479                 val = VMXNET3_RXD_BTYPE_BODY;
480         }
481
482         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
483                 struct Vmxnet3_RxDesc *rxd;
484                 struct rte_mbuf *mbuf;
485                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
486
487                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
488
489                 /* Allocate blank mbuf for the current Rx Descriptor */
490                 mbuf = rte_rxmbuf_alloc(rxq->mp);
491                 if (unlikely(mbuf == NULL)) {
492                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
493                         rxq->stats.rx_buf_alloc_failure++;
494                         err = ENOMEM;
495                         break;
496                 }
497
498                 /*
499                  * Load mbuf pointer into buf_info[ring_size]
500                  * buf_info structure is equivalent to cookie for virtio-virtqueue
501                  */
502                 buf_info->m = mbuf;
503                 buf_info->len = (uint16_t)(mbuf->buf_len -
504                                            RTE_PKTMBUF_HEADROOM);
505                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
506
507                 /* Load Rx Descriptor with the buffer's GPA */
508                 rxd->addr = buf_info->bufPA;
509
510                 /* After this point rxd->addr MUST not be NULL */
511                 rxd->btype = val;
512                 rxd->len = buf_info->len;
513                 /* Flip gen bit at the end to change ownership */
514                 rxd->gen = ring->gen;
515
516                 vmxnet3_cmd_ring_adv_next2fill(ring);
517                 i++;
518         }
519
520         /* Return error only if no buffers are posted at present */
521         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
522                 return -err;
523         else
524                 return i;
525 }
526
527 /*
528  * Process the Rx Completion Ring of given vmxnet3_rx_queue
529  * for nb_pkts burst and return the number of packets received
530  */
531 uint16_t
532 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
533 {
534         uint16_t nb_rx;
535         uint32_t nb_rxd, idx;
536         uint8_t ring_idx;
537         vmxnet3_rx_queue_t *rxq;
538         Vmxnet3_RxCompDesc *rcd;
539         vmxnet3_buf_info_t *rbi;
540         Vmxnet3_RxDesc *rxd;
541         struct rte_mbuf *rxm = NULL;
542         struct vmxnet3_hw *hw;
543
544         nb_rx = 0;
545         ring_idx = 0;
546         nb_rxd = 0;
547         idx = 0;
548
549         rxq = rx_queue;
550         hw = rxq->hw;
551
552         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
553
554         if (unlikely(rxq->stopped)) {
555                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
556                 return 0;
557         }
558
559         while (rcd->gen == rxq->comp_ring.gen) {
560                 if (nb_rx >= nb_pkts)
561                         break;
562
563                 idx = rcd->rxdIdx;
564                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
565                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
566                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
567
568                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
569                         rte_pktmbuf_free_seg(rbi->m);
570                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
571                         goto rcd_done;
572                 }
573
574                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
575
576 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
577                 VMXNET3_ASSERT(rcd->len <= rxd->len);
578                 VMXNET3_ASSERT(rbi->m);
579 #endif
580                 if (unlikely(rcd->len == 0)) {
581                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
582                                    ring_idx, idx);
583 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
584                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
585 #endif
586                         rte_pktmbuf_free_seg(rbi->m);
587                         goto rcd_done;
588                 }
589
590                 /* Assuming a packet is coming in a single packet buffer */
591                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
592                         PMD_RX_LOG(DEBUG,
593                                    "Alert : Misbehaving device, incorrect "
594                                    " buffer type used. iPacket dropped.");
595                         rte_pktmbuf_free_seg(rbi->m);
596                         goto rcd_done;
597                 }
598 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
599                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
600 #endif
601                 /* Get the packet buffer pointer from buf_info */
602                 rxm = rbi->m;
603
604                 /* Clear descriptor associated buf_info to be reused */
605                 rbi->m = NULL;
606                 rbi->bufPA = 0;
607
608                 /* Update the index that we received a packet */
609                 rxq->cmd_ring[ring_idx].next2comp = idx;
610
611                 /* For RCD with EOP set, check if there is frame error */
612                 if (unlikely(rcd->err)) {
613                         rxq->stats.drop_total++;
614                         rxq->stats.drop_err++;
615
616                         if (!rcd->fcs) {
617                                 rxq->stats.drop_fcs++;
618                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
619                         }
620                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
621                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
622                                          rxq->comp_ring.base), rcd->rxdIdx);
623                         rte_pktmbuf_free_seg(rxm);
624                         goto rcd_done;
625                 }
626
627                 /* Check for hardware stripped VLAN tag */
628                 if (rcd->ts) {
629                         PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.",
630                                    rcd->tci);
631                         rxm->ol_flags = PKT_RX_VLAN_PKT;
632                         /* Copy vlan tag in packet buffer */
633                         rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
634                 } else {
635                         rxm->ol_flags = 0;
636                         rxm->vlan_tci = 0;
637                 }
638
639                 /* Initialize newly received packet buffer */
640                 rxm->port = rxq->port_id;
641                 rxm->nb_segs = 1;
642                 rxm->next = NULL;
643                 rxm->pkt_len = (uint16_t)rcd->len;
644                 rxm->data_len = (uint16_t)rcd->len;
645                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
646
647                 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
648                 if (rcd->v4) {
649                         struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
650                         struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
651
652                         if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
653                                 rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
654                         else
655                                 rxm->ol_flags |= PKT_RX_IPV4_HDR;
656
657                         if (!rcd->cnc) {
658                                 if (!rcd->ipc)
659                                         rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
660
661                                 if ((rcd->tcp || rcd->udp) && !rcd->tuc)
662                                         rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
663                         }
664                 }
665
666                 rx_pkts[nb_rx++] = rxm;
667 rcd_done:
668                 rxq->cmd_ring[ring_idx].next2comp = idx;
669                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
670
671                 /* It's time to allocate some new buf and renew descriptors */
672                 vmxnet3_post_rx_bufs(rxq, ring_idx);
673                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
674                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
675                                                rxq->cmd_ring[ring_idx].next2fill);
676                 }
677
678                 /* Advance to the next descriptor in comp_ring */
679                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
680
681                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
682                 nb_rxd++;
683                 if (nb_rxd > rxq->cmd_ring[0].size) {
684                         PMD_RX_LOG(ERR,
685                                    "Used up quota of receiving packets,"
686                                    " relinquish control.");
687                         break;
688                 }
689         }
690
691         return nb_rx;
692 }
693
694 /*
695  * Create memzone for device rings. malloc can't be used as the physical address is
696  * needed. If the memzone is already created, then this function returns a ptr
697  * to the old one.
698  */
699 static const struct rte_memzone *
700 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
701                       uint16_t queue_id, uint32_t ring_size, int socket_id)
702 {
703         char z_name[RTE_MEMZONE_NAMESIZE];
704         const struct rte_memzone *mz;
705
706         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
707                         dev->driver->pci_drv.name, ring_name,
708                         dev->data->port_id, queue_id);
709
710         mz = rte_memzone_lookup(z_name);
711         if (mz)
712                 return mz;
713
714         return rte_memzone_reserve_aligned(z_name, ring_size,
715                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
716 }
717
718 int
719 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
720                            uint16_t queue_idx,
721                            uint16_t nb_desc,
722                            unsigned int socket_id,
723                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
724 {
725         struct vmxnet3_hw *hw = dev->data->dev_private;
726         const struct rte_memzone *mz;
727         struct vmxnet3_tx_queue *txq;
728         struct vmxnet3_cmd_ring *ring;
729         struct vmxnet3_comp_ring *comp_ring;
730         struct vmxnet3_data_ring *data_ring;
731         int size;
732
733         PMD_INIT_FUNC_TRACE();
734
735         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
736             ETH_TXQ_FLAGS_NOMULTSEGS) {
737                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
738                 return -EINVAL;
739         }
740
741         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
742             ETH_TXQ_FLAGS_NOOFFLOADS) {
743                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
744                 return -EINVAL;
745         }
746
747         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
748         if (txq == NULL) {
749                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
750                 return -ENOMEM;
751         }
752
753         txq->queue_id = queue_idx;
754         txq->port_id = dev->data->port_id;
755         txq->shared = &hw->tqd_start[queue_idx];
756         txq->hw = hw;
757         txq->qid = queue_idx;
758         txq->stopped = TRUE;
759
760         ring = &txq->cmd_ring;
761         comp_ring = &txq->comp_ring;
762         data_ring = &txq->data_ring;
763
764         /* Tx vmxnet ring length should be between 512-4096 */
765         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
766                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
767                              VMXNET3_DEF_TX_RING_SIZE);
768                 return -EINVAL;
769         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
770                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
771                              VMXNET3_TX_RING_MAX_SIZE);
772                 return -EINVAL;
773         } else {
774                 ring->size = nb_desc;
775                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
776         }
777         comp_ring->size = data_ring->size = ring->size;
778
779         /* Tx vmxnet rings structure initialization*/
780         ring->next2fill = 0;
781         ring->next2comp = 0;
782         ring->gen = VMXNET3_INIT_GEN;
783         comp_ring->next2proc = 0;
784         comp_ring->gen = VMXNET3_INIT_GEN;
785
786         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
787         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
788         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
789
790         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
791         if (mz == NULL) {
792                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
793                 return -ENOMEM;
794         }
795         memset(mz->addr, 0, mz->len);
796
797         /* cmd_ring initialization */
798         ring->base = mz->addr;
799         ring->basePA = mz->phys_addr;
800
801         /* comp_ring initialization */
802         comp_ring->base = ring->base + ring->size;
803         comp_ring->basePA = ring->basePA +
804                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
805
806         /* data_ring initialization */
807         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
808         data_ring->basePA = comp_ring->basePA +
809                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
810
811         /* cmd_ring0 buf_info allocation */
812         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
813                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
814         if (ring->buf_info == NULL) {
815                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
816                 return -ENOMEM;
817         }
818
819         /* Update the data portion with txq */
820         dev->data->tx_queues[queue_idx] = txq;
821
822         return 0;
823 }
824
825 int
826 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
827                            uint16_t queue_idx,
828                            uint16_t nb_desc,
829                            unsigned int socket_id,
830                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
831                            struct rte_mempool *mp)
832 {
833         const struct rte_memzone *mz;
834         struct vmxnet3_rx_queue *rxq;
835         struct vmxnet3_hw     *hw = dev->data->dev_private;
836         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
837         struct vmxnet3_comp_ring *comp_ring;
838         int size;
839         uint8_t i;
840         char mem_name[32];
841         uint16_t buf_size;
842         struct rte_pktmbuf_pool_private *mbp_priv;
843
844         PMD_INIT_FUNC_TRACE();
845
846         mbp_priv = (struct rte_pktmbuf_pool_private *)
847                 rte_mempool_get_priv(mp);
848         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
849                                RTE_PKTMBUF_HEADROOM);
850
851         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
852                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
853                              "VMXNET3 don't support scatter packets yet",
854                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
855                 return -EINVAL;
856         }
857
858         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
859         if (rxq == NULL) {
860                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
861                 return -ENOMEM;
862         }
863
864         rxq->mp = mp;
865         rxq->queue_id = queue_idx;
866         rxq->port_id = dev->data->port_id;
867         rxq->shared = &hw->rqd_start[queue_idx];
868         rxq->hw = hw;
869         rxq->qid1 = queue_idx;
870         rxq->qid2 = queue_idx + hw->num_rx_queues;
871         rxq->stopped = TRUE;
872
873         ring0 = &rxq->cmd_ring[0];
874         ring1 = &rxq->cmd_ring[1];
875         comp_ring = &rxq->comp_ring;
876
877         /* Rx vmxnet rings length should be between 256-4096 */
878         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
879                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
880                 return -EINVAL;
881         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
882                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
883                 return -EINVAL;
884         } else {
885                 ring0->size = nb_desc;
886                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
887                 ring1->size = ring0->size;
888         }
889
890         comp_ring->size = ring0->size + ring1->size;
891
892         /* Rx vmxnet rings structure initialization */
893         ring0->next2fill = 0;
894         ring1->next2fill = 0;
895         ring0->next2comp = 0;
896         ring1->next2comp = 0;
897         ring0->gen = VMXNET3_INIT_GEN;
898         ring1->gen = VMXNET3_INIT_GEN;
899         comp_ring->next2proc = 0;
900         comp_ring->gen = VMXNET3_INIT_GEN;
901
902         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
903         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
904
905         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
906         if (mz == NULL) {
907                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
908                 return -ENOMEM;
909         }
910         memset(mz->addr, 0, mz->len);
911
912         /* cmd_ring0 initialization */
913         ring0->base = mz->addr;
914         ring0->basePA = mz->phys_addr;
915
916         /* cmd_ring1 initialization */
917         ring1->base = ring0->base + ring0->size;
918         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
919
920         /* comp_ring initialization */
921         comp_ring->base = ring1->base + ring1->size;
922         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
923                 ring1->size;
924
925         /* cmd_ring0-cmd_ring1 buf_info allocation */
926         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
927
928                 ring = &rxq->cmd_ring[i];
929                 ring->rid = i;
930                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
931
932                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
933                 if (ring->buf_info == NULL) {
934                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
935                         return -ENOMEM;
936                 }
937         }
938
939         /* Update the data portion with rxq */
940         dev->data->rx_queues[queue_idx] = rxq;
941
942         return 0;
943 }
944
945 /*
946  * Initializes Receive Unit
947  * Load mbufs in rx queue in advance
948  */
949 int
950 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
951 {
952         struct vmxnet3_hw *hw = dev->data->dev_private;
953
954         int i, ret;
955         uint8_t j;
956
957         PMD_INIT_FUNC_TRACE();
958
959         for (i = 0; i < hw->num_rx_queues; i++) {
960                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
961
962                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
963                         /* Passing 0 as alloc_num will allocate full ring */
964                         ret = vmxnet3_post_rx_bufs(rxq, j);
965                         if (ret <= 0) {
966                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
967                                 return -ret;
968                         }
969                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
970                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
971                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
972                                                        rxq->cmd_ring[j].next2fill);
973                         }
974                 }
975                 rxq->stopped = FALSE;
976         }
977
978         for (i = 0; i < dev->data->nb_tx_queues; i++) {
979                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
980
981                 txq->stopped = FALSE;
982         }
983
984         return 0;
985 }
986
987 static uint8_t rss_intel_key[40] = {
988         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
989         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
990         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
991         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
992         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
993 };
994
995 /*
996  * Configure RSS feature
997  */
998 int
999 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1000 {
1001 #define VMXNET3_RSS_OFFLOAD_ALL ( \
1002                 ETH_RSS_IPV4 | \
1003                 ETH_RSS_IPV4_TCP | \
1004                 ETH_RSS_IPV6 | \
1005                 ETH_RSS_IPV6_TCP)
1006
1007         struct vmxnet3_hw *hw = dev->data->dev_private;
1008         struct VMXNET3_RSSConf *dev_rss_conf;
1009         struct rte_eth_rss_conf *port_rss_conf;
1010         uint64_t rss_hf;
1011         uint8_t i, j;
1012
1013         PMD_INIT_FUNC_TRACE();
1014
1015         dev_rss_conf = hw->rss_conf;
1016         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1017
1018         /* loading hashFunc */
1019         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1020         /* loading hashKeySize */
1021         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1022         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1023         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1024
1025         if (port_rss_conf->rss_key == NULL) {
1026                 /* Default hash key */
1027                 port_rss_conf->rss_key = rss_intel_key;
1028         }
1029
1030         /* loading hashKey */
1031         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1032
1033         /* loading indTable */
1034         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1035                 if (j == dev->data->nb_rx_queues)
1036                         j = 0;
1037                 dev_rss_conf->indTable[i] = j;
1038         }
1039
1040         /* loading hashType */
1041         dev_rss_conf->hashType = 0;
1042         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1043         if (rss_hf & ETH_RSS_IPV4)
1044                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1045         if (rss_hf & ETH_RSS_IPV4_TCP)
1046                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1047         if (rss_hf & ETH_RSS_IPV6)
1048                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1049         if (rss_hf & ETH_RSS_IPV6_TCP)
1050                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1051
1052         return VMXNET3_SUCCESS;
1053 }
1054
1055 /*
1056  * Configure VLAN Filter feature
1057  */
1058 int
1059 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
1060 {
1061         uint8_t i;
1062         struct vmxnet3_hw *hw = dev->data->dev_private;
1063         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
1064
1065         PMD_INIT_FUNC_TRACE();
1066
1067         /* Verify if this tag is already set */
1068         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
1069                 /* Filter all vlan tags out by default */
1070                 vf_table[i] = 0;
1071                 /* To-Do: Provide another routine in dev_ops for user config */
1072
1073                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
1074                                         dev->data->port_id, vf_table[i]);
1075         }
1076
1077         return VMXNET3_SUCCESS;
1078 }