mbuf: replace data pointer by an offset
[dpdk.git] / lib / librte_pmd_vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "vmxnet3/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80
81 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
82         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
83
84 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
85         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
86
87 static uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
88
89 static inline int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t* , uint8_t);
90 static inline void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
91 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
92 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
93 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
94 #endif
95
96 static inline struct rte_mbuf *
97 rte_rxmbuf_alloc(struct rte_mempool *mp)
98 {
99         struct rte_mbuf *m;
100
101         m = __rte_mbuf_raw_alloc(mp);
102         __rte_mbuf_sanity_check_raw(m, 0);
103         return m;
104 }
105
106 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
107 static void
108 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
109 {
110         uint32_t avail = 0;
111
112         if (rxq == NULL)
113                 return;
114
115         PMD_RX_LOG(DEBUG,
116                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
117                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
120                    (unsigned long)rxq->cmd_ring[0].basePA,
121                    (unsigned long)rxq->cmd_ring[1].basePA,
122                    (unsigned long)rxq->comp_ring.basePA);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
125         PMD_RX_LOG(DEBUG,
126                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
127                    (uint32_t)rxq->cmd_ring[0].size, avail,
128                    rxq->comp_ring.next2proc,
129                    rxq->cmd_ring[0].size - avail);
130
131         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
132         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
133                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
134                    rxq->cmd_ring[1].size - avail);
135
136 }
137
138 static void
139 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
140 {
141         uint32_t avail = 0;
142
143         if (txq == NULL)
144                 return;
145
146         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p.",
147                    txq->cmd_ring.base, txq->comp_ring.base);
148         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx.",
149                    (unsigned long)txq->cmd_ring.basePA,
150                    (unsigned long)txq->comp_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static inline void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178 void
179 vmxnet3_dev_tx_queue_release(void *txq)
180 {
181         vmxnet3_tx_queue_t *tq = txq;
182
183         if (txq != NULL) {
184                 /* Release the cmd_ring */
185                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
186         }
187 }
188
189 void
190 vmxnet3_dev_rx_queue_release(void *rxq)
191 {
192         int i;
193         vmxnet3_rx_queue_t *rq = rxq;
194
195         if (rxq != NULL) {
196                 /* Release both the cmd_rings */
197                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
198                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
199         }
200 }
201
202 void
203 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
204 {
205         unsigned i;
206
207         PMD_INIT_FUNC_TRACE();
208
209         for (i = 0; i < dev->data->nb_tx_queues; i++) {
210                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
211
212                 if (txq != NULL) {
213                         txq->stopped = TRUE;
214                         vmxnet3_dev_tx_queue_release(txq);
215                 }
216         }
217
218         for (i = 0; i < dev->data->nb_rx_queues; i++) {
219                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
220
221                 if (rxq != NULL) {
222                         rxq->stopped = TRUE;
223                         vmxnet3_dev_rx_queue_release(rxq);
224                 }
225         }
226 }
227
228 static inline void
229 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
230 {
231         int completed = 0;
232         struct rte_mbuf *mbuf;
233         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
234         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
235                 (comp_ring->base + comp_ring->next2proc);
236
237         while (tcd->gen == comp_ring->gen) {
238
239                 /* Release cmd_ring descriptor and free mbuf */
240 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
241                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
242 #endif
243                 mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
244                 if (unlikely(mbuf == NULL))
245                         rte_panic("EOP desc does not point to a valid mbuf");
246                 else
247                         rte_pktmbuf_free(mbuf);
248
249
250                 txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
251                 /* Mark the txd for which tcd was generated as completed */
252                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
253
254                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
255                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
256                                                     comp_ring->next2proc);
257                 completed++;
258         }
259
260         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
261 }
262
263 uint16_t
264 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
265                   uint16_t nb_pkts)
266 {
267         uint16_t nb_tx;
268         Vmxnet3_TxDesc *txd = NULL;
269         vmxnet3_buf_info_t *tbi = NULL;
270         struct vmxnet3_hw *hw;
271         struct rte_mbuf *txm;
272         vmxnet3_tx_queue_t *txq = tx_queue;
273
274         hw = txq->hw;
275
276         if (txq->stopped) {
277                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
278                 return 0;
279         }
280
281         /* Free up the comp_descriptors aggressively */
282         vmxnet3_tq_tx_complete(txq);
283
284         nb_tx = 0;
285         while (nb_tx < nb_pkts) {
286
287                 if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
288
289                         txm = tx_pkts[nb_tx];
290                         /* Don't support scatter packets yet, free them if met */
291                         if (txm->nb_segs != 1) {
292                                 PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
293                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
294                                 txq->stats.drop_total++;
295
296                                 nb_tx++;
297                                 continue;
298                         }
299
300                         /* Needs to minus ether header len */
301                         if (txm->data_len > (hw->cur_mtu + ETHER_HDR_LEN)) {
302                                 PMD_TX_LOG(DEBUG, "Packet data_len higher than MTU");
303                                 rte_pktmbuf_free(tx_pkts[nb_tx]);
304                                 txq->stats.drop_total++;
305
306                                 nb_tx++;
307                                 continue;
308                         }
309
310                         txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base + txq->cmd_ring.next2fill);
311
312                         /* Fill the tx descriptor */
313                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
314                         tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
315                         txd->addr = tbi->bufPA;
316                         txd->len = txm->data_len;
317
318                         /* Mark the last descriptor as End of Packet. */
319                         txd->cq = 1;
320                         txd->eop = 1;
321
322                         /* Record current mbuf for freeing it later in tx complete */
323 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
324                         VMXNET3_ASSERT(txm);
325 #endif
326                         tbi->m = txm;
327
328                         /* Set the offloading mode to default */
329                         txd->hlen = 0;
330                         txd->om = VMXNET3_OM_NONE;
331                         txd->msscof = 0;
332
333                         /* finally flip the GEN bit of the SOP desc  */
334                         txd->gen = txq->cmd_ring.gen;
335                         txq->shared->ctrl.txNumDeferred++;
336
337                         /* move to the next2fill descriptor */
338                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
339                         nb_tx++;
340
341                 } else {
342                         PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
343                         txq->stats.drop_total += (nb_pkts - nb_tx);
344                         break;
345                 }
346         }
347
348         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
349
350         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
351
352                 txq->shared->ctrl.txNumDeferred = 0;
353                 /* Notify vSwitch that packets are available. */
354                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
355                                        txq->cmd_ring.next2fill);
356         }
357
358         return nb_tx;
359 }
360
361 /*
362  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
363  *  so that device can receive packets in those buffers.
364  *      Ring layout:
365  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
366  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
367  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
368  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
369  *      only for LRO.
370  *
371  */
372 static inline int
373 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
374 {
375         int err = 0;
376         uint32_t i = 0, val = 0;
377         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
378
379         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
380                 struct Vmxnet3_RxDesc *rxd;
381                 struct rte_mbuf *mbuf;
382                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
383
384                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
385
386                 if (ring->rid == 0) {
387                         /* Usually: One HEAD type buf per packet
388                          * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
389                          * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
390                          */
391
392                         /* We use single packet buffer so all heads here */
393                         val = VMXNET3_RXD_BTYPE_HEAD;
394                 } else {
395                         /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */
396                         val = VMXNET3_RXD_BTYPE_BODY;
397                 }
398
399                 /* Allocate blank mbuf for the current Rx Descriptor */
400                 mbuf = rte_rxmbuf_alloc(rxq->mp);
401                 if (mbuf == NULL) {
402                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
403                         rxq->stats.rx_buf_alloc_failure++;
404                         err = ENOMEM;
405                         break;
406                 }
407
408                 /*
409                  * Load mbuf pointer into buf_info[ring_size]
410                  * buf_info structure is equivalent to cookie for virtio-virtqueue
411                  */
412                 buf_info->m = mbuf;
413                 buf_info->len = (uint16_t)(mbuf->buf_len -
414                                            RTE_PKTMBUF_HEADROOM);
415                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
416
417                 /* Load Rx Descriptor with the buffer's GPA */
418                 rxd->addr = buf_info->bufPA;
419
420                 /* After this point rxd->addr MUST not be NULL */
421                 rxd->btype = val;
422                 rxd->len = buf_info->len;
423                 /* Flip gen bit at the end to change ownership */
424                 rxd->gen = ring->gen;
425
426                 vmxnet3_cmd_ring_adv_next2fill(ring);
427                 i++;
428         }
429
430         /* Return error only if no buffers are posted at present */
431         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
432                 return -err;
433         else
434                 return i;
435 }
436
437 /*
438  * Process the Rx Completion Ring of given vmxnet3_rx_queue
439  * for nb_pkts burst and return the number of packets received
440  */
441 uint16_t
442 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
443 {
444         uint16_t nb_rx;
445         uint32_t nb_rxd, idx;
446         uint8_t ring_idx;
447         vmxnet3_rx_queue_t *rxq;
448         Vmxnet3_RxCompDesc *rcd;
449         vmxnet3_buf_info_t *rbi;
450         Vmxnet3_RxDesc *rxd;
451         struct rte_mbuf *rxm = NULL;
452         struct vmxnet3_hw *hw;
453
454         nb_rx = 0;
455         ring_idx = 0;
456         nb_rxd = 0;
457         idx = 0;
458
459         rxq = rx_queue;
460         hw = rxq->hw;
461
462         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
463
464         if (rxq->stopped) {
465                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
466                 return 0;
467         }
468
469         while (rcd->gen == rxq->comp_ring.gen) {
470
471                 if (nb_rx >= nb_pkts)
472                         break;
473                 idx = rcd->rxdIdx;
474                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
475                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
476                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
477
478                 if (rcd->sop != 1 || rcd->eop != 1) {
479                         rte_pktmbuf_free_seg(rbi->m);
480
481                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
482                         goto rcd_done;
483
484                 } else {
485
486                         PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
487
488 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
489                         VMXNET3_ASSERT(rcd->len <= rxd->len);
490                         VMXNET3_ASSERT(rbi->m);
491 #endif
492                         if (rcd->len == 0) {
493                                 PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
494                                            ring_idx, idx);
495 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
496                                 VMXNET3_ASSERT(rcd->sop && rcd->eop);
497 #endif
498                                 rte_pktmbuf_free_seg(rbi->m);
499
500                                 goto rcd_done;
501                         }
502
503                         /* Assuming a packet is coming in a single packet buffer */
504                         if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {
505                                 PMD_RX_LOG(DEBUG,
506                                            "Alert : Misbehaving device, incorrect "
507                                            " buffer type used. iPacket dropped.");
508                                 rte_pktmbuf_free_seg(rbi->m);
509                                 goto rcd_done;
510                         }
511 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
512                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
513 #endif
514                         /* Get the packet buffer pointer from buf_info */
515                         rxm = rbi->m;
516
517                         /* Clear descriptor associated buf_info to be reused */
518                         rbi->m = NULL;
519                         rbi->bufPA = 0;
520
521                         /* Update the index that we received a packet */
522                         rxq->cmd_ring[ring_idx].next2comp = idx;
523
524                         /* For RCD with EOP set, check if there is frame error */
525                         if (rcd->err) {
526                                 rxq->stats.drop_total++;
527                                 rxq->stats.drop_err++;
528
529                                 if (!rcd->fcs) {
530                                         rxq->stats.drop_fcs++;
531                                         PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
532                                 }
533                                 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
534                                            (int)(rcd - (struct Vmxnet3_RxCompDesc *)
535                                                  rxq->comp_ring.base), rcd->rxdIdx);
536                                 rte_pktmbuf_free_seg(rxm);
537
538                                 goto rcd_done;
539                         }
540
541                         /* Check for hardware stripped VLAN tag */
542                         if (rcd->ts) {
543
544                                 PMD_RX_LOG(ERR, "Received packet with vlan ID: %d.",
545                                            rcd->tci);
546                                 rxm->ol_flags = PKT_RX_VLAN_PKT;
547
548 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
549                                 VMXNET3_ASSERT(rxm &&
550                                                rte_pktmbuf_mtod(rxm, void *));
551 #endif
552                                 /* Copy vlan tag in packet buffer */
553                                 rxm->vlan_tci = rte_le_to_cpu_16(
554                                                 (uint16_t)rcd->tci);
555
556                         } else
557                                 rxm->ol_flags = 0;
558
559                         /* Initialize newly received packet buffer */
560                         rxm->port = rxq->port_id;
561                         rxm->nb_segs = 1;
562                         rxm->next = NULL;
563                         rxm->pkt_len = (uint16_t)rcd->len;
564                         rxm->data_len = (uint16_t)rcd->len;
565                         rxm->port = rxq->port_id;
566                         rxm->vlan_tci = 0;
567                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
568
569                         rx_pkts[nb_rx++] = rxm;
570
571 rcd_done:
572                         rxq->cmd_ring[ring_idx].next2comp = idx;
573                         VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
574
575                         /* It's time to allocate some new buf and renew descriptors */
576                         vmxnet3_post_rx_bufs(rxq, ring_idx);
577                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
578                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
579                                                        rxq->cmd_ring[ring_idx].next2fill);
580                         }
581
582                         /* Advance to the next descriptor in comp_ring */
583                         vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
584
585                         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
586                         nb_rxd++;
587                         if (nb_rxd > rxq->cmd_ring[0].size) {
588                                 PMD_RX_LOG(ERR,
589                                            "Used up quota of receiving packets,"
590                                            " relinquish control.");
591                                 break;
592                         }
593                 }
594         }
595
596         return nb_rx;
597 }
598
599 /*
600  * Create memzone for device rings. malloc can't be used as the physical address is
601  * needed. If the memzone is already created, then this function returns a ptr
602  * to the old one.
603  */
604 static const struct rte_memzone *
605 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
606                       uint16_t queue_id, uint32_t ring_size, int socket_id)
607 {
608         char z_name[RTE_MEMZONE_NAMESIZE];
609         const struct rte_memzone *mz;
610
611         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
612                         dev->driver->pci_drv.name, ring_name,
613                         dev->data->port_id, queue_id);
614
615         mz = rte_memzone_lookup(z_name);
616         if (mz)
617                 return mz;
618
619         return rte_memzone_reserve_aligned(z_name, ring_size,
620                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
621 }
622
623 int
624 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
625                            uint16_t queue_idx,
626                            uint16_t nb_desc,
627                            unsigned int socket_id,
628                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
629 {
630         struct vmxnet3_hw     *hw = dev->data->dev_private;
631         const struct rte_memzone *mz;
632         struct vmxnet3_tx_queue *txq;
633         struct vmxnet3_cmd_ring *ring;
634         struct vmxnet3_comp_ring *comp_ring;
635         int size;
636
637         PMD_INIT_FUNC_TRACE();
638
639         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS) !=
640             ETH_TXQ_FLAGS_NOMULTSEGS) {
641                 PMD_INIT_LOG(ERR, "TX Multi segment not support yet");
642                 return -EINVAL;
643         }
644
645         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS) !=
646             ETH_TXQ_FLAGS_NOOFFLOADS) {
647                 PMD_INIT_LOG(ERR, "TX not support offload function yet");
648                 return -EINVAL;
649         }
650
651         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), CACHE_LINE_SIZE);
652         if (txq == NULL) {
653                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
654                 return -ENOMEM;
655         }
656
657         txq->queue_id = queue_idx;
658         txq->port_id = dev->data->port_id;
659         txq->shared = &hw->tqd_start[queue_idx];
660         txq->hw = hw;
661         txq->qid = queue_idx;
662         txq->stopped = TRUE;
663
664         ring = &txq->cmd_ring;
665         comp_ring = &txq->comp_ring;
666
667         /* Tx vmxnet ring length should be between 512-4096 */
668         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
669                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
670                              VMXNET3_DEF_TX_RING_SIZE);
671                 return -EINVAL;
672         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
673                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
674                              VMXNET3_TX_RING_MAX_SIZE);
675                 return -EINVAL;
676         } else {
677                 ring->size = nb_desc;
678                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
679         }
680         comp_ring->size = ring->size;
681
682         /* Tx vmxnet rings structure initialization*/
683         ring->next2fill = 0;
684         ring->next2comp = 0;
685         ring->gen = VMXNET3_INIT_GEN;
686         comp_ring->next2proc = 0;
687         comp_ring->gen = VMXNET3_INIT_GEN;
688
689         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
690         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
691
692         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
693         if (mz == NULL) {
694                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
695                 return -ENOMEM;
696         }
697         memset(mz->addr, 0, mz->len);
698
699         /* cmd_ring initialization */
700         ring->base = mz->addr;
701         ring->basePA = mz->phys_addr;
702
703         /* comp_ring initialization */
704         comp_ring->base = ring->base + ring->size;
705         comp_ring->basePA = ring->basePA +
706                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
707
708         /* cmd_ring0 buf_info allocation */
709         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
710                                      ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
711         if (ring->buf_info == NULL) {
712                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
713                 return -ENOMEM;
714         }
715
716         /* Update the data portion with txq */
717         dev->data->tx_queues[queue_idx] = txq;
718
719         return 0;
720 }
721
722 int
723 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
724                            uint16_t queue_idx,
725                            uint16_t nb_desc,
726                            unsigned int socket_id,
727                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
728                            struct rte_mempool *mp)
729 {
730         const struct rte_memzone *mz;
731         struct vmxnet3_rx_queue *rxq;
732         struct vmxnet3_hw     *hw = dev->data->dev_private;
733         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
734         struct vmxnet3_comp_ring *comp_ring;
735         int size;
736         uint8_t i;
737         char mem_name[32];
738         uint16_t buf_size;
739         struct rte_pktmbuf_pool_private *mbp_priv;
740
741         PMD_INIT_FUNC_TRACE();
742
743         mbp_priv = (struct rte_pktmbuf_pool_private *)
744                 rte_mempool_get_priv(mp);
745         buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
746                                RTE_PKTMBUF_HEADROOM);
747
748         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
749                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
750                              "VMXNET3 don't support scatter packets yet",
751                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
752                 return -EINVAL;
753         }
754
755         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), CACHE_LINE_SIZE);
756         if (rxq == NULL) {
757                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
758                 return -ENOMEM;
759         }
760
761         rxq->mp = mp;
762         rxq->queue_id = queue_idx;
763         rxq->port_id = dev->data->port_id;
764         rxq->shared = &hw->rqd_start[queue_idx];
765         rxq->hw = hw;
766         rxq->qid1 = queue_idx;
767         rxq->qid2 = queue_idx + hw->num_rx_queues;
768         rxq->stopped = TRUE;
769
770         ring0 = &rxq->cmd_ring[0];
771         ring1 = &rxq->cmd_ring[1];
772         comp_ring = &rxq->comp_ring;
773
774         /* Rx vmxnet rings length should be between 256-4096 */
775         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
776                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
777                 return -EINVAL;
778         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
779                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
780                 return -EINVAL;
781         } else {
782                 ring0->size = nb_desc;
783                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
784                 ring1->size = ring0->size;
785         }
786
787         comp_ring->size = ring0->size + ring1->size;
788
789         /* Rx vmxnet rings structure initialization */
790         ring0->next2fill = 0;
791         ring1->next2fill = 0;
792         ring0->next2comp = 0;
793         ring1->next2comp = 0;
794         ring0->gen = VMXNET3_INIT_GEN;
795         ring1->gen = VMXNET3_INIT_GEN;
796         comp_ring->next2proc = 0;
797         comp_ring->gen = VMXNET3_INIT_GEN;
798
799         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
800         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
801
802         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
803         if (mz == NULL) {
804                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
805                 return -ENOMEM;
806         }
807         memset(mz->addr, 0, mz->len);
808
809         /* cmd_ring0 initialization */
810         ring0->base = mz->addr;
811         ring0->basePA = mz->phys_addr;
812
813         /* cmd_ring1 initialization */
814         ring1->base = ring0->base + ring0->size;
815         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
816
817         /* comp_ring initialization */
818         comp_ring->base = ring1->base +  ring1->size;
819         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
820                 ring1->size;
821
822         /* cmd_ring0-cmd_ring1 buf_info allocation */
823         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
824
825                 ring = &rxq->cmd_ring[i];
826                 ring->rid = i;
827                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
828
829                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), CACHE_LINE_SIZE);
830                 if (ring->buf_info == NULL) {
831                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
832                         return -ENOMEM;
833                 }
834         }
835
836         /* Update the data portion with rxq */
837         dev->data->rx_queues[queue_idx] = rxq;
838
839         return 0;
840 }
841
842 /*
843  * Initializes Receive Unit
844  * Load mbufs in rx queue in advance
845  */
846 int
847 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
848 {
849         struct vmxnet3_hw *hw = dev->data->dev_private;
850
851         int i, ret;
852         uint8_t j;
853
854         PMD_INIT_FUNC_TRACE();
855
856         for (i = 0; i < hw->num_rx_queues; i++) {
857                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
858
859                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
860                         /* Passing 0 as alloc_num will allocate full ring */
861                         ret = vmxnet3_post_rx_bufs(rxq, j);
862                         if (ret <= 0) {
863                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
864                                 return -ret;
865                         }
866                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
867                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
868                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
869                                                        rxq->cmd_ring[j].next2fill);
870                         }
871                 }
872                 rxq->stopped = FALSE;
873         }
874
875         for (i = 0; i < dev->data->nb_tx_queues; i++) {
876                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
877
878                 txq->stopped = FALSE;
879         }
880
881         return 0;
882 }
883
884 static uint8_t rss_intel_key[40] = {
885         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
886         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
887         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
888         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
889         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
890 };
891
892 /*
893  * Configure RSS feature
894  */
895 int
896 vmxnet3_rss_configure(struct rte_eth_dev *dev)
897 {
898 #define VMXNET3_RSS_OFFLOAD_ALL ( \
899                 ETH_RSS_IPV4 | \
900                 ETH_RSS_IPV4_TCP | \
901                 ETH_RSS_IPV6 | \
902                 ETH_RSS_IPV6_TCP)
903
904         struct vmxnet3_hw *hw = dev->data->dev_private;
905         struct VMXNET3_RSSConf *dev_rss_conf;
906         struct rte_eth_rss_conf *port_rss_conf;
907         uint64_t rss_hf;
908         uint8_t i, j;
909
910         PMD_INIT_FUNC_TRACE();
911
912         dev_rss_conf = hw->rss_conf;
913         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
914
915         /* loading hashFunc */
916         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
917         /* loading hashKeySize */
918         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
919         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
920         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
921
922         if (port_rss_conf->rss_key == NULL) {
923                 /* Default hash key */
924                 port_rss_conf->rss_key = rss_intel_key;
925         }
926
927         /* loading hashKey */
928         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
929
930         /* loading indTable */
931         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
932                 if (j == dev->data->nb_rx_queues)
933                         j = 0;
934                 dev_rss_conf->indTable[i] = j;
935         }
936
937         /* loading hashType */
938         dev_rss_conf->hashType = 0;
939         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
940         if (rss_hf & ETH_RSS_IPV4)
941                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
942         if (rss_hf & ETH_RSS_IPV4_TCP)
943                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
944         if (rss_hf & ETH_RSS_IPV6)
945                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
946         if (rss_hf & ETH_RSS_IPV6_TCP)
947                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
948
949         return VMXNET3_SUCCESS;
950 }
951
952 /*
953  * Configure VLAN Filter feature
954  */
955 int
956 vmxnet3_vlan_configure(struct rte_eth_dev *dev)
957 {
958         uint8_t i;
959         struct vmxnet3_hw *hw = dev->data->dev_private;
960         uint32_t *vf_table = hw->shared->devRead.rxFilterConf.vfTable;
961
962         PMD_INIT_FUNC_TRACE();
963
964         /* Verify if this tag is already set */
965         for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
966                 /* Filter all vlan tags out by default */
967                 vf_table[i] = 0;
968                 /* To-Do: Provide another routine in dev_ops for user config */
969
970                 PMD_INIT_LOG(DEBUG, "Registering VLAN portid: %"PRIu8" tag %u",
971                                         dev->data->port_id, vf_table[i]);
972         }
973
974         return VMXNET3_SUCCESS;
975 }