vmxnet3: fix vlan Rx stripping
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83
84 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
85         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86
87 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88
89 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
90 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
91 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
92 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
93 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
94 #endif
95
96 static inline struct rte_mbuf *
97 rte_rxmbuf_alloc(struct rte_mempool *mp)
98 {
99         struct rte_mbuf *m;
100
101         m = __rte_mbuf_raw_alloc(mp);
102         __rte_mbuf_sanity_check_raw(m, 0);
103         return m;
104 }
105
106 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 static void
108 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 {
110         uint32_t avail = 0;
111
112         if (rxq == NULL)
113                 return;
114
115         PMD_RX_LOG(DEBUG,
116                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
117                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
120                    (unsigned long)rxq->cmd_ring[0].basePA,
121                    (unsigned long)rxq->cmd_ring[1].basePA,
122                    (unsigned long)rxq->comp_ring.basePA);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125         PMD_RX_LOG(DEBUG,
126                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
127                    (uint32_t)rxq->cmd_ring[0].size, avail,
128                    rxq->comp_ring.next2proc,
129                    rxq->cmd_ring[0].size - avail);
130
131         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
132         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
133                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
134                    rxq->cmd_ring[1].size - avail);
135
136 }
137
138 static void
139 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
140 {
141         uint32_t avail = 0;
142
143         if (txq == NULL)
144                 return;
145
146         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
147                    txq->cmd_ring.base, txq->comp_ring.base);
148         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
149                    (unsigned long)txq->cmd_ring.basePA,
150                    (unsigned long)txq->comp_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178 void
179 vmxnet3_dev_tx_queue_release(void *txq)
180 {
181         vmxnet3_tx_queue_t *tq = txq;
182
183         if (txq != NULL) {
184                 /* Release the cmd_ring */
185                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
186         }
187 }
188
189 void
190 vmxnet3_dev_rx_queue_release(void *rxq)
191 {
192         int i;
193         vmxnet3_rx_queue_t *rq = rxq;
194
195         if (rxq != NULL) {
196                 /* Release both the cmd_rings */
197                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
198                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
199         }
200 }
201
202 void
203 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
204 {
205         unsigned i;
206
207         PMD_INIT_FUNC_TRACE();
208
209         for (i = 0; i < dev->data->nb_tx_queues; i++) {
210                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
211
212                 if (txq != NULL) {
213                         txq->stopped = TRUE;
214                         vmxnet3_dev_tx_queue_release(txq);
215                 }
216         }
217
218         for (i = 0; i < dev->data->nb_rx_queues; i++) {
219                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
220
221                 if (rxq != NULL) {
222                         rxq->stopped = TRUE;
223                         vmxnet3_dev_rx_queue_release(rxq);
224                 }
225         }
226 }
227
228 static inline void
229 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
230 {
231         int completed = 0;
232         struct rte_mbuf *mbuf;
233         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
234         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
235                 (comp_ring->base + comp_ring->next2proc);
236
237         while (tcd->gen == comp_ring->gen) {
238
239                 /* Release cmd_ring descriptor and free mbuf */
240 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
241                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
242 #endif
243                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
244                 if (unlikely(mbuf == NULL))
245                         rte_panic("EOP desc does not point to a valid mbuf");
246                 else
247                         rte_pktmbuf_free(mbuf);
248
249
250                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
251                 /* Mark the txd for which tcd was generated as completed */
252                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
253
254                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
255                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
256                                                     comp_ring->next2proc);
257                 completed++;
258         }
259
260         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
261 }
262
263 uint16_t
264 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
265                   uint16_t nb_pkts)
266 {
267         uint16_t nb_tx;
268         Vmxnet3_TxDesc *txd = NULL;
269         vmxnet3_buf_info_t *tbi = NULL;
270         struct vmxnet3_hw *hw;
271         struct rte_mbuf *txm;
272         vmxnet3_tx_queue_t *txq = tx_queue;
273
274         hw = txq->hw;
275
276         if (txq->stopped) {
277                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
278                 return 0;
279         }
280
281         /* Free up the comp_descriptors aggressively */
282         vmxnet3_tq_tx_complete(txq);
283
284         nb_tx = 0;
285         while (nb_tx < nb_pkts) {
286
287                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
288
289                         txm = tx_pkts[nb_tx];
290                         /* Don't support scatter packets yet, free them if met */
291                         if (txm->nb_segs != 1) {
292                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
293                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
294                                 txq->stats.drop_total++;
295
296                                 nb_tx++;
297                                 continue;
298                         }
299
300                         /* Needs to minus ether header len */
301                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
302                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
303                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
304                                 txq->stats.drop_total++;
305
306                                 nb_tx++;
307                                 continue;
308                         }
309
310                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
311
312                         /* Fill the tx descriptor */
313                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
314                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
315                         txd->addr = tbi->bufPA;
316                         txd->len = txm->data_len;
317
318                         /* Mark the last descriptor as End of Packet. */
319                         txd->cq = 1;
320                         txd->eop = 1;
321
322                         /* Record current mbuf for freeing it later in tx complete */
323 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
324                         VMXNET3_ASSERT(txm);
325 #endif
326                         tbi->m = txm;
327
328                         /* Set the offloading mode to default */
329                         txd->hlen = 0;
330                         txd->om = VMXNET3_OM_NONE;
331                         txd->msscof = 0;
332
333                         /* finally flip the GEN bit of the SOP desc  */
334                         txd->gen = txq->cmd_ring.gen;
335                         txq->shared->ctrl.txNumDeferred++;
336
337                         /* move to the next2fill descriptor */
338                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
339                         nb_tx++;
340
341                 } else {
342                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
343                         txq->stats.drop_total += (nb_pkts - nb_tx);
344                         break;
345                 }
346         }
347
348         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
349
350         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
351
352                 txq->shared->ctrl.txNumDeferred = 0;
353                 /* Notify vSwitch that packets are available. */
354                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
355                                        txq->cmd_ring.next2fill);
356         }
357
358         return nb_tx;
359 }
360
361 /*
362  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
363  *  so that device can receive packets in those buffers.
364  *      Ring layout:
365  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
366  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
367  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
368  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
369  *      only for LRO.
370  *
371  */
372 static inline int
373 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
374 {
375         int err = 0;
376         uint32_t i = 0, val = 0;
377         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
378
379         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
380                 struct Vmxnet3_RxDesc *rxd;
381                 struct rte_mbuf *mbuf;
382                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
383
384                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
385
386                 if (ring->rid == 0) {
387                         /* Usually: One HEAD type buf per packet
388                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
389                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
390                          */
391
392                         /* We use single packet buffer so all heads here */
393                         val = VMXNET3_RXD_BTYPE_HEAD;
394                 } else {
395                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
396                         val = VMXNET3_RXD_BTYPE_BODY;
397                 }
398
399                 /* Allocate blank mbuf for the current Rx Descriptor */
400                 mbuf = rte_rxmbuf_alloc(rxq->mp);
401                 if (mbuf == NULL) {
402                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
403                         rxq->stats.rx_buf_alloc_failure++;
404                         err = ENOMEM;
405                         break;
406                 }
407
408                 /*
409                  * Load mbuf pointer into buf_info[ring_size]
410                  * buf_info structure is equivalent to cookie for virtio-virtqueue
411                  */
412                 buf_info->m = mbuf;
413                 buf_info->len = (uint16_t)(mbuf->buf_len -
414                                            RTE_PKTMBUF_HEADROOM);
415                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
416
417                 /* Load Rx Descriptor with the buffer's GPA */
418                 rxd->addr = buf_info->bufPA;
419
420                 /* After this point rxd->addr MUST not be NULL */
421                 rxd->btype = val;
422                 rxd->len = buf_info->len;
423                 /* Flip gen bit at the end to change ownership */
424                 rxd->gen = ring->gen;
425
426                 vmxnet3_cmd_ring_adv_next2fill(ring);
427                 i++;
428         }
429
430         /* Return error only if no buffers are posted at present */
431         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
432                 return -err;
433         else
434                 return i;
435 }
436
437 /*
438  * Process the Rx Completion Ring of given vmxnet3_rx_queue
439  * for nb_pkts burst and return the number of packets received
440  */
441 uint16_t
442 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
443 {
444         uint16_t nb_rx;
445         uint32_t nb_rxd, idx;
446         uint8_t ring_idx;
447         vmxnet3_rx_queue_t *rxq;
448         Vmxnet3_RxCompDesc *rcd;
449         vmxnet3_buf_info_t *rbi;
450         Vmxnet3_RxDesc *rxd;
451         struct rte_mbuf *rxm = NULL;
452         struct vmxnet3_hw *hw;
453
454         nb_rx = 0;
455         ring_idx = 0;
456         nb_rxd = 0;
457         idx = 0;
458
459         rxq = rx_queue;
460         hw = rxq->hw;
461
462         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
463
464         if (rxq->stopped) {
465                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
466                 return 0;
467         }
468
469         while (rcd->gen == rxq->comp_ring.gen) {
470
471                 if (nb_rx >= nb_pkts)
472                         break;
473                 idx = rcd->rxdIdx;
474                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
475                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
476                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
477
478                 if (rcd->sop != 1 || rcd->eop != 1) {
479                         rte_pktmbuf_free_seg(rbi->m);
480
481                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
482                         goto rcd_done;
483
484                 } else {
485
486                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
487
488 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
489                         VMXNET3_ASSERT(rcd->len <= rxd->len);
490                         VMXNET3_ASSERT(rbi->m);
491 #endif
492                         if (rcd->len == 0) {
493                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
494                                            ring_idx, idx);
495 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
496                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
497 #endif
498                                 rte_pktmbuf_free_seg(rbi->m);
499
500                                 goto rcd_done;
501                         }
502
503                         /* Assuming a packet is coming in a single packet buffer */
504                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
505                                 PMD_RX_LOG(DEBUG,
506                                            "Alert : Misbehaving device, incorrect "
507                                            " buffer type used. iPacket dropped.");
508                                 rte_pktmbuf_free_seg(rbi->m);
509                                 goto rcd_done;
510                         }
511 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
512                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
513 #endif
514                         /* Get the packet buffer pointer from buf_info */
515                         rxm = rbi->m;
516
517                         /* Clear descriptor associated buf_info to be reused */
518                         rbi->m = NULL;
519                         rbi->bufPA = 0;
520
521                         /* Update the index that we received a packet */
522                         rxq->cmd_ring[ring_idx].next2comp = idx;
523
524                         /* For RCD with EOP set, check if there is frame error */
525                         if (rcd->err) {
526                                 rxq->stats.drop_total++;
527                                 rxq->stats.drop_err++;
528
529                                 if (!rcd->fcs) {
530                                         rxq->stats.drop_fcs++;
531                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
532                                 }
533                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
534                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
535                                                  rxq->comp_ring.base), rcd->rxdIdx);
536                                 rte_pktmbuf_free_seg(rxm);
537
538                                 goto rcd_done;
539                         }
540
541                         /* Check for hardware stripped VLAN tag */
542                         if (rcd->ts) {
543                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.",
544                                            rcd->tci);
545                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
546 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
547                                 VMXNET3_ASSERT(rxm &&
548                                                rte_pktmbuf_mtod(rxm, void *));
549 #endif
550                                 /* Copy vlan tag in packet buffer */
551                                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
552                         } else {
553                                 rxm->ol_flags = 0;
554                                 rxm->vlan_tci = 0;
555                         }
556
557                         /* Initialize newly received packet buffer */
558                         rxm->port = rxq->port_id;
559                         rxm->nb_segs = 1;
560                         rxm->next = NULL;
561                         rxm->pkt_len = (uint16_t)rcd->len;
562                         rxm->data_len = (uint16_t)rcd->len;
563                         rxm->port = rxq->port_id;
564                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
565
566                         rx_pkts[nb_rx++] = rxm;
567 rcd_done:
568                         rxq->cmd_ring[ring_idx].next2comp = idx;
569                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
570
571                         /* It's time to allocate some new buf and renew descriptors */
572                         vmxnet3_post_rx_bufs(rxq, ring_idx);
573                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
574                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
575                                                        rxq->cmd_ring[ring_idx].next2fill);
576                         }
577
578                         /* Advance to the next descriptor in comp_ring */
579                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
580
581                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
582                         nb_rxd++;
583                         if (nb_rxd > rxq->cmd_ring[0].size) {
584                                 PMD_RX_LOG(ERR,
585                                            "Used up quota of receiving packets,"
586                                            " relinquish control.");
587                                 break;
588                         }
589                 }
590         }
591
592         return nb_rx;
593 }
594
595 /*
596  * Create memzone for device rings. malloc can't be used as the physical address is
597  * needed. If the memzone is already created, then this function returns a ptr
598  * to the old one.
599  */
600 static const struct rte_memzone *
601 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
602                       uint16_t queue_id, uint32_t ring_size, int socket_id)
603 {
604         char z_name[RTE_MEMZONE_NAMESIZE];
605         const struct rte_memzone *mz;
606
607         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
608                         dev->driver->pci_drv.name, ring_name,
609                         dev->data->port_id, queue_id);
610
611         mz = rte_memzone_lookup(z_name);
612         if (mz)
613                 return mz;
614
615         return rte_memzone_reserve_aligned(z_name, ring_size,
616                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
617 }
618
619 int
620 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
621                            uint16_t queue_idx,
622                            uint16_t nb_desc,
623                            unsigned int socket_id,
624                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
625 {
626         struct vmxnet3_hw     *hw = dev->data->dev_private;
627         const struct rte_memzone *mz;
628         struct vmxnet3_tx_queue *txq;
629         struct vmxnet3_cmd_ring *ring;
630         struct vmxnet3_comp_ring *comp_ring;
631         int size;
632
633         PMD_INIT_FUNC_TRACE();
634
635         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
636             ETH_TXQ_FLAGS_NOMULTSEGS) {
637                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
638                 return -EINVAL;
639         }
640
641         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
642             ETH_TXQ_FLAGS_NOOFFLOADS) {
643                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
644                 return -EINVAL;
645         }
646
647         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
648         if (txq == NULL) {
649                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
650                 return -ENOMEM;
651         }
652
653         txq->queue_id = queue_idx;
654         txq->port_id = dev->data->port_id;
655         txq->shared = &hw->tqd_start[queue_idx];
656         txq->hw = hw;
657         txq->qid = queue_idx;
658         txq->stopped = TRUE;
659
660         ring = &txq->cmd_ring;
661         comp_ring = &txq->comp_ring;
662
663         /* Tx vmxnet ring length should be between 512-4096 */
664         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
665                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
666                              VMXNET3_DEF_TX_RING_SIZE);
667                 return -EINVAL;
668         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
669                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
670                              VMXNET3_TX_RING_MAX_SIZE);
671                 return -EINVAL;
672         } else {
673                 ring->size = nb_desc;
674                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
675         }
676         comp_ring->size = ring->size;
677
678         /* Tx vmxnet rings structure initialization*/
679         ring->next2fill = 0;
680         ring->next2comp = 0;
681         ring->gen = VMXNET3_INIT_GEN;
682         comp_ring->next2proc = 0;
683         comp_ring->gen = VMXNET3_INIT_GEN;
684
685         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
686         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
687
688         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
689         if (mz == NULL) {
690                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
691                 return -ENOMEM;
692         }
693         memset(mz->addr, 0, mz->len);
694
695         /* cmd_ring initialization */
696         ring->base = mz->addr;
697         ring->basePA = mz->phys_addr;
698
699         /* comp_ring initialization */
700         comp_ring->base = ring->base + ring->size;
701         comp_ring->basePA = ring->basePA +
702                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
703
704         /* cmd_ring0 buf_info allocation */
705         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
706                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
707         if (ring->buf_info == NULL) {
708                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
709                 return -ENOMEM;
710         }
711
712         /* Update the data portion with txq */
713         dev->data->tx_queues[queue_idx] = txq;
714
715         return 0;
716 }
717
718 int
719 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
720                            uint16_t queue_idx,
721                            uint16_t nb_desc,
722                            unsigned int socket_id,
723                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
724                            struct rte_mempool *mp)
725 {
726         const struct rte_memzone *mz;
727         struct vmxnet3_rx_queue *rxq;
728         struct vmxnet3_hw     *hw = dev->data->dev_private;
729         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
730         struct vmxnet3_comp_ring *comp_ring;
731         int size;
732         uint8_t i;
733         char mem_name[32];
734         uint16_t buf_size;
735         struct rte_pktmbuf_pool_private *mbp_priv;
736
737         PMD_INIT_FUNC_TRACE();
738
739         mbp_priv = (struct rte_pktmbuf_pool_private *)
740                 rte_mempool_get_priv(mp);
741         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
742                                RTE_PKTMBUF_HEADROOM);
743
744         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
745                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
746                              "VMXNET3 don't support scatter packets yet",
747                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
748                 return -EINVAL;
749         }
750
751         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
752         if (rxq == NULL) {
753                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
754                 return -ENOMEM;
755         }
756
757         rxq->mp = mp;
758         rxq->queue_id = queue_idx;
759         rxq->port_id = dev->data->port_id;
760         rxq->shared = &hw->rqd_start[queue_idx];
761         rxq->hw = hw;
762         rxq->qid1 = queue_idx;
763         rxq->qid2 = queue_idx + hw->num_rx_queues;
764         rxq->stopped = TRUE;
765
766         ring0 = &rxq->cmd_ring[0];
767         ring1 = &rxq->cmd_ring[1];
768         comp_ring = &rxq->comp_ring;
769
770         /* Rx vmxnet rings length should be between 256-4096 */
771         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
772                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
773                 return -EINVAL;
774         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
775                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
776                 return -EINVAL;
777         } else {
778                 ring0->size = nb_desc;
779                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
780                 ring1->size = ring0->size;
781         }
782
783         comp_ring->size = ring0->size + ring1->size;
784
785         /* Rx vmxnet rings structure initialization */
786         ring0->next2fill = 0;
787         ring1->next2fill = 0;
788         ring0->next2comp = 0;
789         ring1->next2comp = 0;
790         ring0->gen = VMXNET3_INIT_GEN;
791         ring1->gen = VMXNET3_INIT_GEN;
792         comp_ring->next2proc = 0;
793         comp_ring->gen = VMXNET3_INIT_GEN;
794
795         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
796         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
797
798         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
799         if (mz == NULL) {
800                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
801                 return -ENOMEM;
802         }
803         memset(mz->addr, 0, mz->len);
804
805         /* cmd_ring0 initialization */
806         ring0->base = mz->addr;
807         ring0->basePA = mz->phys_addr;
808
809         /* cmd_ring1 initialization */
810         ring1->base = ring0->base + ring0->size;
811         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
812
813         /* comp_ring initialization */
814         comp_ring->base = ring1->base +  ring1->size;
815         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
816                 ring1->size;
817
818         /* cmd_ring0-cmd_ring1 buf_info allocation */
819         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
820
821                 ring = &rxq->cmd_ring[i];
822                 ring->rid = i;
823                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
824
825                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
826                 if (ring->buf_info == NULL) {
827                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
828                         return -ENOMEM;
829                 }
830         }
831
832         /* Update the data portion with rxq */
833         dev->data->rx_queues[queue_idx] = rxq;
834
835         return 0;
836 }
837
838 /*
839  * Initializes Receive Unit
840  * Load mbufs in rx queue in advance
841  */
842 int
843 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
844 {
845         struct vmxnet3_hw *hw = dev->data->dev_private;
846
847         int i, ret;
848         uint8_t j;
849
850         PMD_INIT_FUNC_TRACE();
851
852         for (i = 0; i < hw->num_rx_queues; i++) {
853                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
854
855                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
856                         /* Passing 0 as alloc_num will allocate full ring */
857                         ret = vmxnet3_post_rx_bufs(rxq, j);
858                         if (ret <= 0) {
859                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
860                                 return -ret;
861                         }
862                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
863                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
864                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
865                                                        rxq->cmd_ring[j].next2fill);
866                         }
867                 }
868                 rxq->stopped = FALSE;
869         }
870
871         for (i = 0; i < dev->data->nb_tx_queues; i++) {
872                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
873
874                 txq->stopped = FALSE;
875         }
876
877         return 0;
878 }
879
880 static uint8_t rss_intel_key[40] = {
881         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
882         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
883         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
884         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
885         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
886 };
887
888 /*
889  * Configure RSS feature
890  */
891 int
892 vmxnet3_rss_configure(struct rte_eth_dev *dev)
893 {
894 #define VMXNET3_RSS_OFFLOAD_ALL ( \
895                 ETH_RSS_IPV4 | \
896                 ETH_RSS_IPV4_TCP | \
897                 ETH_RSS_IPV6 | \
898                 ETH_RSS_IPV6_TCP)
899
900         struct vmxnet3_hw *hw = dev->data->dev_private;
901         struct VMXNET3_RSSConf *dev_rss_conf;
902         struct rte_eth_rss_conf *port_rss_conf;
903         uint64_t rss_hf;
904         uint8_t i, j;
905
906         PMD_INIT_FUNC_TRACE();
907
908         dev_rss_conf = hw->rss_conf;
909         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
910
911         /* loading hashFunc */
912         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
913         /* loading hashKeySize */
914         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
915         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
916         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
917
918         if (port_rss_conf->rss_key == NULL) {
919                 /* Default hash key */
920                 port_rss_conf->rss_key = rss_intel_key;
921         }
922
923         /* loading hashKey */
924         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
925
926         /* loading indTable */
927         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
928                 if (j == dev->data->nb_rx_queues)
929                         j = 0;
930                 dev_rss_conf->indTable[i] = j;
931         }
932
933         /* loading hashType */
934         dev_rss_conf->hashType = 0;
935         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
936         if (rss_hf & ETH_RSS_IPV4)
937                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
938         if (rss_hf & ETH_RSS_IPV4_TCP)
939                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
940         if (rss_hf & ETH_RSS_IPV6)
941                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
942         if (rss_hf & ETH_RSS_IPV6_TCP)
943                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
944
945         return VMXNET3_SUCCESS;
946 }
947
948 /*
949  * Configure VLAN Filter feature
950  */
951 int
952 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
953 {
954         uint8_t i;
955         struct vmxnet3_hw *hw = dev->data->dev_private;
956         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
957
958         PMD_INIT_FUNC_TRACE();
959
960         /* Verify if this tag is already set */
961         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
962                 /* Filter all vlan tags out by default */
963                 vf_table[i] = 0;
964                 /* To-Do: Provide another routine in dev_ops for user config */
965
966                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
967                                         dev->data->port_id, vf_table[i]);
968         }
969
970         return VMXNET3_SUCCESS;
971 }