5f207452b6c32f5d129986ff0156659e71053a35
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
87 #endif
88
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
91 {
92         struct rte_mbuf *m;
93
94         m = __rte_mbuf_raw_alloc(mp);
95         __rte_mbuf_sanity_check_raw(m, 0);
96         return m;
97 }
98
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
100 static void
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
102 {
103         uint32_t avail = 0;
104
105         if (rxq == NULL)
106                 return;
107
108         PMD_RX_LOG(DEBUG,
109                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
111         PMD_RX_LOG(DEBUG,
112                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113                    (unsigned long)rxq->cmd_ring[0].basePA,
114                    (unsigned long)rxq->cmd_ring[1].basePA,
115                    (unsigned long)rxq->comp_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120                    (uint32_t)rxq->cmd_ring[0].size, avail,
121                    rxq->comp_ring.next2proc,
122                    rxq->cmd_ring[0].size - avail);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127                    rxq->cmd_ring[1].size - avail);
128
129 }
130
131 static void
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
133 {
134         uint32_t avail = 0;
135
136         if (txq == NULL)
137                 return;
138
139         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142                    (unsigned long)txq->cmd_ring.basePA,
143                    (unsigned long)txq->comp_ring.basePA,
144                    (unsigned long)txq->data_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148                    (uint32_t)txq->cmd_ring.size, avail,
149                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static void
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159
160                 if (buf_info->m) {
161                         rte_pktmbuf_free(buf_info->m);
162                         buf_info->m = NULL;
163                         buf_info->bufPA = 0;
164                         buf_info->len = 0;
165                 }
166                 vmxnet3_cmd_ring_adv_next2comp(ring);
167         }
168 }
169
170 static void
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
172 {
173         vmxnet3_cmd_ring_release_mbufs(ring);
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (tq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 static void
204 vmxnet3_dev_tx_queue_reset(void *txq)
205 {
206         vmxnet3_tx_queue_t *tq = txq;
207         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
210         int size;
211
212         if (tq != NULL) {
213                 /* Release the cmd_ring mbufs */
214                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
215         }
216
217         /* Tx vmxnet rings structure initialization*/
218         ring->next2fill = 0;
219         ring->next2comp = 0;
220         ring->gen = VMXNET3_INIT_GEN;
221         comp_ring->next2proc = 0;
222         comp_ring->gen = VMXNET3_INIT_GEN;
223
224         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
227
228         memset(ring->base, 0, size);
229 }
230
231 static void
232 vmxnet3_dev_rx_queue_reset(void *rxq)
233 {
234         int i;
235         vmxnet3_rx_queue_t *rq = rxq;
236         struct vmxnet3_cmd_ring *ring0, *ring1;
237         struct vmxnet3_comp_ring *comp_ring;
238         int size;
239
240         if (rq != NULL) {
241                 /* Release both the cmd_rings mbufs */
242                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
244         }
245
246         ring0 = &rq->cmd_ring[0];
247         ring1 = &rq->cmd_ring[1];
248         comp_ring = &rq->comp_ring;
249
250         /* Rx vmxnet rings structure initialization */
251         ring0->next2fill = 0;
252         ring1->next2fill = 0;
253         ring0->next2comp = 0;
254         ring1->next2comp = 0;
255         ring0->gen = VMXNET3_INIT_GEN;
256         ring1->gen = VMXNET3_INIT_GEN;
257         comp_ring->next2proc = 0;
258         comp_ring->gen = VMXNET3_INIT_GEN;
259
260         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
262
263         memset(ring0->base, 0, size);
264 }
265
266 void
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
268 {
269         unsigned i;
270
271         PMD_INIT_FUNC_TRACE();
272
273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
274                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
275
276                 if (txq != NULL) {
277                         txq->stopped = TRUE;
278                         vmxnet3_dev_tx_queue_reset(txq);
279                 }
280         }
281
282         for (i = 0; i < dev->data->nb_rx_queues; i++) {
283                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
284
285                 if (rxq != NULL) {
286                         rxq->stopped = TRUE;
287                         vmxnet3_dev_rx_queue_reset(rxq);
288                 }
289         }
290 }
291
292 static void
293 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
294 {
295         int completed = 0;
296         struct rte_mbuf *mbuf;
297         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
298         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
299                 (comp_ring->base + comp_ring->next2proc);
300
301         while (tcd->gen == comp_ring->gen) {
302                 /* Release cmd_ring descriptor and free mbuf */
303                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
304                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
305                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
306                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
307                         rte_pktmbuf_free_seg(mbuf);
308
309                         /* Mark the txd for which tcd was generated as completed */
310                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
311                         completed++;
312                 }
313
314                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
315                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
316                                                     comp_ring->next2proc);
317         }
318
319         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
320 }
321
322 uint16_t
323 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
324                   uint16_t nb_pkts)
325 {
326         uint16_t nb_tx;
327         vmxnet3_tx_queue_t *txq = tx_queue;
328         struct vmxnet3_hw *hw = txq->hw;
329
330         if (unlikely(txq->stopped)) {
331                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
332                 return 0;
333         }
334
335         /* Free up the comp_descriptors aggressively */
336         vmxnet3_tq_tx_complete(txq);
337
338         nb_tx = 0;
339         while (nb_tx < nb_pkts) {
340                 Vmxnet3_GenericDesc *gdesc;
341                 vmxnet3_buf_info_t *tbi;
342                 uint32_t first2fill, avail, dw2;
343                 struct rte_mbuf *txm = tx_pkts[nb_tx];
344                 struct rte_mbuf *m_seg = txm;
345                 int copy_size = 0;
346
347                 /* Is this packet execessively fragmented, then drop */
348                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
349                         ++txq->stats.drop_too_many_segs;
350                         ++txq->stats.drop_total;
351                         rte_pktmbuf_free(txm);
352                         ++nb_tx;
353                         continue;
354                 }
355
356                 /* Is command ring full? */
357                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
358                 if (txm->nb_segs > avail) {
359                         ++txq->stats.tx_ring_full;
360                         break;
361                 }
362
363                 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
364                         struct Vmxnet3_TxDataDesc *tdd;
365
366                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
367                         copy_size = rte_pktmbuf_pkt_len(txm);
368                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
369                 }
370
371                 /* use the previous gen bit for the SOP desc */
372                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
373                 first2fill = txq->cmd_ring.next2fill;
374                 do {
375                         /* Remember the transmit buffer for cleanup */
376                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
377                         tbi->m = m_seg;
378
379                         /* NB: the following assumes that VMXNET3 maximum
380                            transmit buffer size (16K) is greater than
381                            maximum sizeof mbuf segment size. */
382                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
383                         if (copy_size)
384                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
385                                                                 txq->cmd_ring.next2fill *
386                                                                 sizeof(struct Vmxnet3_TxDataDesc));
387                         else
388                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
389
390                         gdesc->dword[2] = dw2 | m_seg->data_len;
391                         gdesc->dword[3] = 0;
392
393                         /* move to the next2fill descriptor */
394                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
395
396                         /* use the right gen for non-SOP desc */
397                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
398                 } while ((m_seg = m_seg->next) != NULL);
399
400                 /* Update the EOP descriptor */
401                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
402
403                 /* Add VLAN tag if present */
404                 gdesc = txq->cmd_ring.base + first2fill;
405                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
406                         gdesc->txd.ti = 1;
407                         gdesc->txd.tci = txm->vlan_tci;
408                 }
409
410                 /* TODO: Add transmit checksum offload here */
411
412                 /* flip the GEN bit on the SOP */
413                 rte_compiler_barrier();
414                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
415
416                 txq->shared->ctrl.txNumDeferred++;
417                 nb_tx++;
418         }
419
420         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
421
422         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
423
424                 txq->shared->ctrl.txNumDeferred = 0;
425                 /* Notify vSwitch that packets are available. */
426                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
427                                        txq->cmd_ring.next2fill);
428         }
429
430         return nb_tx;
431 }
432
433 /*
434  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
435  *  so that device can receive packets in those buffers.
436  *      Ring layout:
437  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
438  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
439  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
440  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
441  *      only for LRO.
442  *
443  */
444 static int
445 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
446 {
447         int err = 0;
448         uint32_t i = 0, val = 0;
449         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
450
451         if (ring_id == 0) {
452                 /* Usually: One HEAD type buf per packet
453                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
454                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
455                  */
456
457                 /* We use single packet buffer so all heads here */
458                 val = VMXNET3_RXD_BTYPE_HEAD;
459         } else {
460                 /* All BODY type buffers for 2nd ring */
461                 val = VMXNET3_RXD_BTYPE_BODY;
462         }
463
464         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
465                 struct Vmxnet3_RxDesc *rxd;
466                 struct rte_mbuf *mbuf;
467                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
468
469                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
470
471                 /* Allocate blank mbuf for the current Rx Descriptor */
472                 mbuf = rte_rxmbuf_alloc(rxq->mp);
473                 if (unlikely(mbuf == NULL)) {
474                         PMD_RX_LOG(ERR, "Error allocating mbuf");
475                         rxq->stats.rx_buf_alloc_failure++;
476                         err = ENOMEM;
477                         break;
478                 }
479
480                 /*
481                  * Load mbuf pointer into buf_info[ring_size]
482                  * buf_info structure is equivalent to cookie for virtio-virtqueue
483                  */
484                 buf_info->m = mbuf;
485                 buf_info->len = (uint16_t)(mbuf->buf_len -
486                                            RTE_PKTMBUF_HEADROOM);
487                 buf_info->bufPA =
488                         rte_mbuf_data_dma_addr_default(mbuf);
489
490                 /* Load Rx Descriptor with the buffer's GPA */
491                 rxd->addr = buf_info->bufPA;
492
493                 /* After this point rxd->addr MUST not be NULL */
494                 rxd->btype = val;
495                 rxd->len = buf_info->len;
496                 /* Flip gen bit at the end to change ownership */
497                 rxd->gen = ring->gen;
498
499                 vmxnet3_cmd_ring_adv_next2fill(ring);
500                 i++;
501         }
502
503         /* Return error only if no buffers are posted at present */
504         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
505                 return -err;
506         else
507                 return i;
508 }
509
510
511 /* Receive side checksum and other offloads */
512 static void
513 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
514 {
515         /* Check for hardware stripped VLAN tag */
516         if (rcd->ts) {
517                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
518                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
519         }
520
521         /* Check for RSS */
522         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
523                 rxm->ol_flags |= PKT_RX_RSS_HASH;
524                 rxm->hash.rss = rcd->rssHash;
525         }
526
527         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
528         if (rcd->v4) {
529                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
530                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
531
532                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
533                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
534                 else
535                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
536
537                 if (!rcd->cnc) {
538                         if (!rcd->ipc)
539                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
540
541                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
542                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
543                 }
544         }
545 }
546
547 /*
548  * Process the Rx Completion Ring of given vmxnet3_rx_queue
549  * for nb_pkts burst and return the number of packets received
550  */
551 uint16_t
552 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
553 {
554         uint16_t nb_rx;
555         uint32_t nb_rxd, idx;
556         uint8_t ring_idx;
557         vmxnet3_rx_queue_t *rxq;
558         Vmxnet3_RxCompDesc *rcd;
559         vmxnet3_buf_info_t *rbi;
560         Vmxnet3_RxDesc *rxd;
561         struct rte_mbuf *rxm = NULL;
562         struct vmxnet3_hw *hw;
563
564         nb_rx = 0;
565         ring_idx = 0;
566         nb_rxd = 0;
567         idx = 0;
568
569         rxq = rx_queue;
570         hw = rxq->hw;
571
572         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
573
574         if (unlikely(rxq->stopped)) {
575                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
576                 return 0;
577         }
578
579         while (rcd->gen == rxq->comp_ring.gen) {
580                 if (nb_rx >= nb_pkts)
581                         break;
582
583                 idx = rcd->rxdIdx;
584                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
585                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
586                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
587
588                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
589                         rte_pktmbuf_free_seg(rbi->m);
590                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
591                         goto rcd_done;
592                 }
593
594                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
595
596                 VMXNET3_ASSERT(rcd->len <= rxd->len);
597                 VMXNET3_ASSERT(rbi->m);
598
599                 if (unlikely(rcd->len == 0)) {
600                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
601                                    ring_idx, idx);
602                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
603                         rte_pktmbuf_free_seg(rbi->m);
604                         goto rcd_done;
605                 }
606
607                 /* Assuming a packet is coming in a single packet buffer */
608                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
609                         PMD_RX_LOG(DEBUG,
610                                    "Alert : Misbehaving device, incorrect "
611                                    " buffer type used. Packet dropped.");
612                         rte_pktmbuf_free_seg(rbi->m);
613                         goto rcd_done;
614                 }
615                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
616
617                 /* Get the packet buffer pointer from buf_info */
618                 rxm = rbi->m;
619
620                 /* Clear descriptor associated buf_info to be reused */
621                 rbi->m = NULL;
622                 rbi->bufPA = 0;
623
624                 /* Update the index that we received a packet */
625                 rxq->cmd_ring[ring_idx].next2comp = idx;
626
627                 /* For RCD with EOP set, check if there is frame error */
628                 if (unlikely(rcd->err)) {
629                         rxq->stats.drop_total++;
630                         rxq->stats.drop_err++;
631
632                         if (!rcd->fcs) {
633                                 rxq->stats.drop_fcs++;
634                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
635                         }
636                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
637                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
638                                          rxq->comp_ring.base), rcd->rxdIdx);
639                         rte_pktmbuf_free_seg(rxm);
640                         goto rcd_done;
641                 }
642
643
644                 /* Initialize newly received packet buffer */
645                 rxm->port = rxq->port_id;
646                 rxm->nb_segs = 1;
647                 rxm->next = NULL;
648                 rxm->pkt_len = (uint16_t)rcd->len;
649                 rxm->data_len = (uint16_t)rcd->len;
650                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
651                 rxm->ol_flags = 0;
652                 rxm->vlan_tci = 0;
653
654                 vmxnet3_rx_offload(rcd, rxm);
655
656                 rx_pkts[nb_rx++] = rxm;
657 rcd_done:
658                 rxq->cmd_ring[ring_idx].next2comp = idx;
659                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
660
661                 /* It's time to allocate some new buf and renew descriptors */
662                 vmxnet3_post_rx_bufs(rxq, ring_idx);
663                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
664                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
665                                                rxq->cmd_ring[ring_idx].next2fill);
666                 }
667
668                 /* Advance to the next descriptor in comp_ring */
669                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
670
671                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
672                 nb_rxd++;
673                 if (nb_rxd > rxq->cmd_ring[0].size) {
674                         PMD_RX_LOG(ERR,
675                                    "Used up quota of receiving packets,"
676                                    " relinquish control.");
677                         break;
678                 }
679         }
680
681         return nb_rx;
682 }
683
684 /*
685  * Create memzone for device rings. malloc can't be used as the physical address is
686  * needed. If the memzone is already created, then this function returns a ptr
687  * to the old one.
688  */
689 static const struct rte_memzone *
690 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
691                       uint16_t queue_id, uint32_t ring_size, int socket_id)
692 {
693         char z_name[RTE_MEMZONE_NAMESIZE];
694         const struct rte_memzone *mz;
695
696         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
697                         dev->driver->pci_drv.name, ring_name,
698                         dev->data->port_id, queue_id);
699
700         mz = rte_memzone_lookup(z_name);
701         if (mz)
702                 return mz;
703
704         return rte_memzone_reserve_aligned(z_name, ring_size,
705                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
706 }
707
708 int
709 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
710                            uint16_t queue_idx,
711                            uint16_t nb_desc,
712                            unsigned int socket_id,
713                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
714 {
715         struct vmxnet3_hw *hw = dev->data->dev_private;
716         const struct rte_memzone *mz;
717         struct vmxnet3_tx_queue *txq;
718         struct vmxnet3_cmd_ring *ring;
719         struct vmxnet3_comp_ring *comp_ring;
720         struct vmxnet3_data_ring *data_ring;
721         int size;
722
723         PMD_INIT_FUNC_TRACE();
724
725         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
726             ETH_TXQ_FLAGS_NOXSUMS) {
727                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
728                 return -EINVAL;
729         }
730
731         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
732         if (txq == NULL) {
733                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
734                 return -ENOMEM;
735         }
736
737         txq->queue_id = queue_idx;
738         txq->port_id = dev->data->port_id;
739         txq->shared = &hw->tqd_start[queue_idx];
740         txq->hw = hw;
741         txq->qid = queue_idx;
742         txq->stopped = TRUE;
743
744         ring = &txq->cmd_ring;
745         comp_ring = &txq->comp_ring;
746         data_ring = &txq->data_ring;
747
748         /* Tx vmxnet ring length should be between 512-4096 */
749         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
750                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
751                              VMXNET3_DEF_TX_RING_SIZE);
752                 return -EINVAL;
753         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
754                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
755                              VMXNET3_TX_RING_MAX_SIZE);
756                 return -EINVAL;
757         } else {
758                 ring->size = nb_desc;
759                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
760         }
761         comp_ring->size = data_ring->size = ring->size;
762
763         /* Tx vmxnet rings structure initialization*/
764         ring->next2fill = 0;
765         ring->next2comp = 0;
766         ring->gen = VMXNET3_INIT_GEN;
767         comp_ring->next2proc = 0;
768         comp_ring->gen = VMXNET3_INIT_GEN;
769
770         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
771         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
772         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
773
774         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
775         if (mz == NULL) {
776                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
777                 return -ENOMEM;
778         }
779         memset(mz->addr, 0, mz->len);
780
781         /* cmd_ring initialization */
782         ring->base = mz->addr;
783         ring->basePA = mz->phys_addr;
784
785         /* comp_ring initialization */
786         comp_ring->base = ring->base + ring->size;
787         comp_ring->basePA = ring->basePA +
788                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
789
790         /* data_ring initialization */
791         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
792         data_ring->basePA = comp_ring->basePA +
793                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
794
795         /* cmd_ring0 buf_info allocation */
796         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
797                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
798         if (ring->buf_info == NULL) {
799                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
800                 return -ENOMEM;
801         }
802
803         /* Update the data portion with txq */
804         dev->data->tx_queues[queue_idx] = txq;
805
806         return 0;
807 }
808
809 int
810 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
811                            uint16_t queue_idx,
812                            uint16_t nb_desc,
813                            unsigned int socket_id,
814                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
815                            struct rte_mempool *mp)
816 {
817         const struct rte_memzone *mz;
818         struct vmxnet3_rx_queue *rxq;
819         struct vmxnet3_hw     *hw = dev->data->dev_private;
820         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
821         struct vmxnet3_comp_ring *comp_ring;
822         int size;
823         uint8_t i;
824         char mem_name[32];
825         uint16_t buf_size;
826
827         PMD_INIT_FUNC_TRACE();
828
829         buf_size = rte_pktmbuf_data_room_size(mp) -
830                 RTE_PKTMBUF_HEADROOM;
831
832         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
833                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
834                              "VMXNET3 don't support scatter packets yet",
835                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
836                 return -EINVAL;
837         }
838
839         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
840         if (rxq == NULL) {
841                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
842                 return -ENOMEM;
843         }
844
845         rxq->mp = mp;
846         rxq->queue_id = queue_idx;
847         rxq->port_id = dev->data->port_id;
848         rxq->shared = &hw->rqd_start[queue_idx];
849         rxq->hw = hw;
850         rxq->qid1 = queue_idx;
851         rxq->qid2 = queue_idx + hw->num_rx_queues;
852         rxq->stopped = TRUE;
853
854         ring0 = &rxq->cmd_ring[0];
855         ring1 = &rxq->cmd_ring[1];
856         comp_ring = &rxq->comp_ring;
857
858         /* Rx vmxnet rings length should be between 256-4096 */
859         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
860                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
861                 return -EINVAL;
862         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
863                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
864                 return -EINVAL;
865         } else {
866                 ring0->size = nb_desc;
867                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
868                 ring1->size = ring0->size;
869         }
870
871         comp_ring->size = ring0->size + ring1->size;
872
873         /* Rx vmxnet rings structure initialization */
874         ring0->next2fill = 0;
875         ring1->next2fill = 0;
876         ring0->next2comp = 0;
877         ring1->next2comp = 0;
878         ring0->gen = VMXNET3_INIT_GEN;
879         ring1->gen = VMXNET3_INIT_GEN;
880         comp_ring->next2proc = 0;
881         comp_ring->gen = VMXNET3_INIT_GEN;
882
883         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
884         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
885
886         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
887         if (mz == NULL) {
888                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
889                 return -ENOMEM;
890         }
891         memset(mz->addr, 0, mz->len);
892
893         /* cmd_ring0 initialization */
894         ring0->base = mz->addr;
895         ring0->basePA = mz->phys_addr;
896
897         /* cmd_ring1 initialization */
898         ring1->base = ring0->base + ring0->size;
899         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
900
901         /* comp_ring initialization */
902         comp_ring->base = ring1->base + ring1->size;
903         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
904                 ring1->size;
905
906         /* cmd_ring0-cmd_ring1 buf_info allocation */
907         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
908
909                 ring = &rxq->cmd_ring[i];
910                 ring->rid = i;
911                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
912
913                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
914                 if (ring->buf_info == NULL) {
915                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
916                         return -ENOMEM;
917                 }
918         }
919
920         /* Update the data portion with rxq */
921         dev->data->rx_queues[queue_idx] = rxq;
922
923         return 0;
924 }
925
926 /*
927  * Initializes Receive Unit
928  * Load mbufs in rx queue in advance
929  */
930 int
931 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
932 {
933         struct vmxnet3_hw *hw = dev->data->dev_private;
934
935         int i, ret;
936         uint8_t j;
937
938         PMD_INIT_FUNC_TRACE();
939
940         for (i = 0; i < hw->num_rx_queues; i++) {
941                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
942
943                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
944                         /* Passing 0 as alloc_num will allocate full ring */
945                         ret = vmxnet3_post_rx_bufs(rxq, j);
946                         if (ret <= 0) {
947                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
948                                 return -ret;
949                         }
950                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
951                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
952                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
953                                                        rxq->cmd_ring[j].next2fill);
954                         }
955                 }
956                 rxq->stopped = FALSE;
957         }
958
959         for (i = 0; i < dev->data->nb_tx_queues; i++) {
960                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
961
962                 txq->stopped = FALSE;
963         }
964
965         return 0;
966 }
967
968 static uint8_t rss_intel_key[40] = {
969         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
970         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
971         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
972         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
973         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
974 };
975
976 /*
977  * Configure RSS feature
978  */
979 int
980 vmxnet3_rss_configure(struct rte_eth_dev *dev)
981 {
982         struct vmxnet3_hw *hw = dev->data->dev_private;
983         struct VMXNET3_RSSConf *dev_rss_conf;
984         struct rte_eth_rss_conf *port_rss_conf;
985         uint64_t rss_hf;
986         uint8_t i, j;
987
988         PMD_INIT_FUNC_TRACE();
989
990         dev_rss_conf = hw->rss_conf;
991         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
992
993         /* loading hashFunc */
994         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
995         /* loading hashKeySize */
996         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
997         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
998         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
999
1000         if (port_rss_conf->rss_key == NULL) {
1001                 /* Default hash key */
1002                 port_rss_conf->rss_key = rss_intel_key;
1003         }
1004
1005         /* loading hashKey */
1006         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1007
1008         /* loading indTable */
1009         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1010                 if (j == dev->data->nb_rx_queues)
1011                         j = 0;
1012                 dev_rss_conf->indTable[i] = j;
1013         }
1014
1015         /* loading hashType */
1016         dev_rss_conf->hashType = 0;
1017         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1018         if (rss_hf & ETH_RSS_IPV4)
1019                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1020         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1021                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1022         if (rss_hf & ETH_RSS_IPV6)
1023                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1024         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1025                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1026
1027         return VMXNET3_SUCCESS;
1028 }