39ad6ef841121699a1c277b0ddd6652392a95574
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
81         (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
82
83 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
84         (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
85
86 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
87
88 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
89 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
90 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
91 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
92 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
93 #endif
94
95 static struct rte_mbuf *
96 rte_rxmbuf_alloc(struct rte_mempool *mp)
97 {
98         struct rte_mbuf *m;
99
100         m = __rte_mbuf_raw_alloc(mp);
101         __rte_mbuf_sanity_check_raw(m, 0);
102         return m;
103 }
104
105 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
106 static void
107 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
108 {
109         uint32_t avail = 0;
110
111         if (rxq == NULL)
112                 return;
113
114         PMD_RX_LOG(DEBUG,
115                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
116                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
117         PMD_RX_LOG(DEBUG,
118                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
119                    (unsigned long)rxq->cmd_ring[0].basePA,
120                    (unsigned long)rxq->cmd_ring[1].basePA,
121                    (unsigned long)rxq->comp_ring.basePA);
122
123         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
124         PMD_RX_LOG(DEBUG,
125                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[0].size, avail,
127                    rxq->comp_ring.next2proc,
128                    rxq->cmd_ring[0].size - avail);
129
130         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
131         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
132                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
133                    rxq->cmd_ring[1].size - avail);
134
135 }
136
137 static void
138 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
139 {
140         uint32_t avail = 0;
141
142         if (txq == NULL)
143                 return;
144
145         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
146                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
147         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
148                    (unsigned long)txq->cmd_ring.basePA,
149                    (unsigned long)txq->comp_ring.basePA,
150                    (unsigned long)txq->data_ring.basePA);
151
152         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
153         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
154                    (uint32_t)txq->cmd_ring.size, avail,
155                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
156 }
157 #endif
158
159 static void
160 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
161 {
162         while (ring->next2comp != ring->next2fill) {
163                 /* No need to worry about tx desc ownership, device is quiesced by now. */
164                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
165
166                 if (buf_info->m) {
167                         rte_pktmbuf_free(buf_info->m);
168                         buf_info->m = NULL;
169                         buf_info->bufPA = 0;
170                         buf_info->len = 0;
171                 }
172                 vmxnet3_cmd_ring_adv_next2comp(ring);
173         }
174 }
175
176 static void
177 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
178 {
179         vmxnet3_cmd_ring_release_mbufs(ring);
180         rte_free(ring->buf_info);
181         ring->buf_info = NULL;
182 }
183
184
185 void
186 vmxnet3_dev_tx_queue_release(void *txq)
187 {
188         vmxnet3_tx_queue_t *tq = txq;
189
190         if (tq != NULL) {
191                 /* Release the cmd_ring */
192                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
193         }
194 }
195
196 void
197 vmxnet3_dev_rx_queue_release(void *rxq)
198 {
199         int i;
200         vmxnet3_rx_queue_t *rq = rxq;
201
202         if (rq != NULL) {
203                 /* Release both the cmd_rings */
204                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
205                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
206         }
207 }
208
209 static void
210 vmxnet3_dev_tx_queue_reset(void *txq)
211 {
212         vmxnet3_tx_queue_t *tq = txq;
213         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
214         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
215         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
216         int size;
217
218         if (tq != NULL) {
219                 /* Release the cmd_ring mbufs */
220                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
221         }
222
223         /* Tx vmxnet rings structure initialization*/
224         ring->next2fill = 0;
225         ring->next2comp = 0;
226         ring->gen = VMXNET3_INIT_GEN;
227         comp_ring->next2proc = 0;
228         comp_ring->gen = VMXNET3_INIT_GEN;
229
230         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
231         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
232         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
233
234         memset(ring->base, 0, size);
235 }
236
237 static void
238 vmxnet3_dev_rx_queue_reset(void *rxq)
239 {
240         int i;
241         vmxnet3_rx_queue_t *rq = rxq;
242         struct vmxnet3_cmd_ring *ring0, *ring1;
243         struct vmxnet3_comp_ring *comp_ring;
244         int size;
245
246         if (rq != NULL) {
247                 /* Release both the cmd_rings mbufs */
248                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
249                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
250         }
251
252         ring0 = &rq->cmd_ring[0];
253         ring1 = &rq->cmd_ring[1];
254         comp_ring = &rq->comp_ring;
255
256         /* Rx vmxnet rings structure initialization */
257         ring0->next2fill = 0;
258         ring1->next2fill = 0;
259         ring0->next2comp = 0;
260         ring1->next2comp = 0;
261         ring0->gen = VMXNET3_INIT_GEN;
262         ring1->gen = VMXNET3_INIT_GEN;
263         comp_ring->next2proc = 0;
264         comp_ring->gen = VMXNET3_INIT_GEN;
265
266         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
267         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
268
269         memset(ring0->base, 0, size);
270 }
271
272 void
273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275         unsigned i;
276
277         PMD_INIT_FUNC_TRACE();
278
279         for (i = 0; i < dev->data->nb_tx_queues; i++) {
280                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282                 if (txq != NULL) {
283                         txq->stopped = TRUE;
284                         vmxnet3_dev_tx_queue_reset(txq);
285                 }
286         }
287
288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
289                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291                 if (rxq != NULL) {
292                         rxq->stopped = TRUE;
293                         vmxnet3_dev_rx_queue_reset(rxq);
294                 }
295         }
296 }
297
298 static void
299 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
300 {
301         int completed = 0;
302         struct rte_mbuf *mbuf;
303         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
304         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
305                 (comp_ring->base + comp_ring->next2proc);
306
307         while (tcd->gen == comp_ring->gen) {
308                 /* Release cmd_ring descriptor and free mbuf */
309                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
310                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
311                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
312                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
313                         rte_pktmbuf_free_seg(mbuf);
314
315                         /* Mark the txd for which tcd was generated as completed */
316                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317                         completed++;
318                 }
319
320                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
321                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
322                                                     comp_ring->next2proc);
323         }
324
325         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
326 }
327
328 uint16_t
329 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
330                   uint16_t nb_pkts)
331 {
332         uint16_t nb_tx;
333         vmxnet3_tx_queue_t *txq = tx_queue;
334         struct vmxnet3_hw *hw = txq->hw;
335
336         if (unlikely(txq->stopped)) {
337                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
338                 return 0;
339         }
340
341         /* Free up the comp_descriptors aggressively */
342         vmxnet3_tq_tx_complete(txq);
343
344         nb_tx = 0;
345         while (nb_tx < nb_pkts) {
346                 Vmxnet3_GenericDesc *gdesc;
347                 vmxnet3_buf_info_t *tbi;
348                 uint32_t first2fill, avail, dw2;
349                 struct rte_mbuf *txm = tx_pkts[nb_tx];
350                 struct rte_mbuf *m_seg = txm;
351
352                 /* Is this packet execessively fragmented, then drop */
353                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
354                         ++txq->stats.drop_too_many_segs;
355                         ++txq->stats.drop_total;
356                         rte_pktmbuf_free(txm);
357                         ++nb_tx;
358                         continue;
359                 }
360
361                 /* Is command ring full? */
362                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
363                 if (txm->nb_segs > avail) {
364                         ++txq->stats.tx_ring_full;
365                         break;
366                 }
367
368                 /* use the previous gen bit for the SOP desc */
369                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
370                 first2fill = txq->cmd_ring.next2fill;
371                 do {
372                         /* Remember the transmit buffer for cleanup */
373                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
374                         tbi->m = m_seg;
375
376                         /* NB: the following assumes that VMXNET3 maximum
377                            transmit buffer size (16K) is greater than
378                            maximum sizeof mbuf segment size. */
379                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
380                         gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
381                         gdesc->dword[2] = dw2 | m_seg->data_len;
382                         gdesc->dword[3] = 0;
383
384                         /* move to the next2fill descriptor */
385                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
386
387                         /* use the right gen for non-SOP desc */
388                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
389                 } while ((m_seg = m_seg->next) != NULL);
390
391                 /* Update the EOP descriptor */
392                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
393
394                 /* Add VLAN tag if present */
395                 gdesc = txq->cmd_ring.base + first2fill;
396                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
397                         gdesc->txd.ti = 1;
398                         gdesc->txd.tci = txm->vlan_tci;
399                 }
400
401                 /* TODO: Add transmit checksum offload here */
402
403                 /* flip the GEN bit on the SOP */
404                 rte_compiler_barrier();
405                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
406
407                 txq->shared->ctrl.txNumDeferred++;
408                 nb_tx++;
409         }
410
411         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
412
413         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
414
415                 txq->shared->ctrl.txNumDeferred = 0;
416                 /* Notify vSwitch that packets are available. */
417                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
418                                        txq->cmd_ring.next2fill);
419         }
420
421         return nb_tx;
422 }
423
424 /*
425  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
426  *  so that device can receive packets in those buffers.
427  *      Ring layout:
428  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
429  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
430  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
431  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
432  *      only for LRO.
433  *
434  */
435 static int
436 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
437 {
438         int err = 0;
439         uint32_t i = 0, val = 0;
440         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
441
442         if (ring_id == 0) {
443                 /* Usually: One HEAD type buf per packet
444                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
445                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
446                  */
447
448                 /* We use single packet buffer so all heads here */
449                 val = VMXNET3_RXD_BTYPE_HEAD;
450         } else {
451                 /* All BODY type buffers for 2nd ring */
452                 val = VMXNET3_RXD_BTYPE_BODY;
453         }
454
455         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
456                 struct Vmxnet3_RxDesc *rxd;
457                 struct rte_mbuf *mbuf;
458                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
459
460                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
461
462                 /* Allocate blank mbuf for the current Rx Descriptor */
463                 mbuf = rte_rxmbuf_alloc(rxq->mp);
464                 if (unlikely(mbuf == NULL)) {
465                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
466                         rxq->stats.rx_buf_alloc_failure++;
467                         err = ENOMEM;
468                         break;
469                 }
470
471                 /*
472                  * Load mbuf pointer into buf_info[ring_size]
473                  * buf_info structure is equivalent to cookie for virtio-virtqueue
474                  */
475                 buf_info->m = mbuf;
476                 buf_info->len = (uint16_t)(mbuf->buf_len -
477                                            RTE_PKTMBUF_HEADROOM);
478                 buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
479
480                 /* Load Rx Descriptor with the buffer's GPA */
481                 rxd->addr = buf_info->bufPA;
482
483                 /* After this point rxd->addr MUST not be NULL */
484                 rxd->btype = val;
485                 rxd->len = buf_info->len;
486                 /* Flip gen bit at the end to change ownership */
487                 rxd->gen = ring->gen;
488
489                 vmxnet3_cmd_ring_adv_next2fill(ring);
490                 i++;
491         }
492
493         /* Return error only if no buffers are posted at present */
494         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
495                 return -err;
496         else
497                 return i;
498 }
499
500
501 /* Receive side checksum and other offloads */
502 static void
503 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
504 {
505         /* Check for hardware stripped VLAN tag */
506         if (rcd->ts) {
507                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
508                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
509         }
510
511         /* Check for RSS */
512         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
513                 rxm->ol_flags |= PKT_RX_RSS_HASH;
514                 rxm->hash.rss = rcd->rssHash;
515         }
516
517         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
518         if (rcd->v4) {
519                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
520                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
521
522                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
523 #ifdef RTE_NEXT_ABI
524                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
525 #else
526                         rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;
527 #endif
528                 else
529 #ifdef RTE_NEXT_ABI
530                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
531 #else
532                         rxm->ol_flags |= PKT_RX_IPV4_HDR;
533 #endif
534
535                 if (!rcd->cnc) {
536                         if (!rcd->ipc)
537                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
538
539                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
540                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
541                 }
542         }
543 }
544
545 /*
546  * Process the Rx Completion Ring of given vmxnet3_rx_queue
547  * for nb_pkts burst and return the number of packets received
548  */
549 uint16_t
550 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
551 {
552         uint16_t nb_rx;
553         uint32_t nb_rxd, idx;
554         uint8_t ring_idx;
555         vmxnet3_rx_queue_t *rxq;
556         Vmxnet3_RxCompDesc *rcd;
557         vmxnet3_buf_info_t *rbi;
558         Vmxnet3_RxDesc *rxd;
559         struct rte_mbuf *rxm = NULL;
560         struct vmxnet3_hw *hw;
561
562         nb_rx = 0;
563         ring_idx = 0;
564         nb_rxd = 0;
565         idx = 0;
566
567         rxq = rx_queue;
568         hw = rxq->hw;
569
570         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
571
572         if (unlikely(rxq->stopped)) {
573                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
574                 return 0;
575         }
576
577         while (rcd->gen == rxq->comp_ring.gen) {
578                 if (nb_rx >= nb_pkts)
579                         break;
580
581                 idx = rcd->rxdIdx;
582                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
583                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
584                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
585
586                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
587                         rte_pktmbuf_free_seg(rbi->m);
588                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
589                         goto rcd_done;
590                 }
591
592                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
593
594                 VMXNET3_ASSERT(rcd->len <= rxd->len);
595                 VMXNET3_ASSERT(rbi->m);
596
597                 if (unlikely(rcd->len == 0)) {
598                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
599                                    ring_idx, idx);
600                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
601                         rte_pktmbuf_free_seg(rbi->m);
602                         goto rcd_done;
603                 }
604
605                 /* Assuming a packet is coming in a single packet buffer */
606                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
607                         PMD_RX_LOG(DEBUG,
608                                    "Alert : Misbehaving device, incorrect "
609                                    " buffer type used. iPacket dropped.");
610                         rte_pktmbuf_free_seg(rbi->m);
611                         goto rcd_done;
612                 }
613                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
614
615                 /* Get the packet buffer pointer from buf_info */
616                 rxm = rbi->m;
617
618                 /* Clear descriptor associated buf_info to be reused */
619                 rbi->m = NULL;
620                 rbi->bufPA = 0;
621
622                 /* Update the index that we received a packet */
623                 rxq->cmd_ring[ring_idx].next2comp = idx;
624
625                 /* For RCD with EOP set, check if there is frame error */
626                 if (unlikely(rcd->err)) {
627                         rxq->stats.drop_total++;
628                         rxq->stats.drop_err++;
629
630                         if (!rcd->fcs) {
631                                 rxq->stats.drop_fcs++;
632                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
633                         }
634                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
635                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
636                                          rxq->comp_ring.base), rcd->rxdIdx);
637                         rte_pktmbuf_free_seg(rxm);
638                         goto rcd_done;
639                 }
640
641
642                 /* Initialize newly received packet buffer */
643                 rxm->port = rxq->port_id;
644                 rxm->nb_segs = 1;
645                 rxm->next = NULL;
646                 rxm->pkt_len = (uint16_t)rcd->len;
647                 rxm->data_len = (uint16_t)rcd->len;
648                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
649                 rxm->ol_flags = 0;
650                 rxm->vlan_tci = 0;
651
652                 vmxnet3_rx_offload(rcd, rxm);
653
654                 rx_pkts[nb_rx++] = rxm;
655 rcd_done:
656                 rxq->cmd_ring[ring_idx].next2comp = idx;
657                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
658
659                 /* It's time to allocate some new buf and renew descriptors */
660                 vmxnet3_post_rx_bufs(rxq, ring_idx);
661                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
662                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
663                                                rxq->cmd_ring[ring_idx].next2fill);
664                 }
665
666                 /* Advance to the next descriptor in comp_ring */
667                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
668
669                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
670                 nb_rxd++;
671                 if (nb_rxd > rxq->cmd_ring[0].size) {
672                         PMD_RX_LOG(ERR,
673                                    "Used up quota of receiving packets,"
674                                    " relinquish control.");
675                         break;
676                 }
677         }
678
679         return nb_rx;
680 }
681
682 /*
683  * Create memzone for device rings. malloc can't be used as the physical address is
684  * needed. If the memzone is already created, then this function returns a ptr
685  * to the old one.
686  */
687 static const struct rte_memzone *
688 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
689                       uint16_t queue_id, uint32_t ring_size, int socket_id)
690 {
691         char z_name[RTE_MEMZONE_NAMESIZE];
692         const struct rte_memzone *mz;
693
694         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
695                         dev->driver->pci_drv.name, ring_name,
696                         dev->data->port_id, queue_id);
697
698         mz = rte_memzone_lookup(z_name);
699         if (mz)
700                 return mz;
701
702         return rte_memzone_reserve_aligned(z_name, ring_size,
703                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
704 }
705
706 int
707 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
708                            uint16_t queue_idx,
709                            uint16_t nb_desc,
710                            unsigned int socket_id,
711                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
712 {
713         struct vmxnet3_hw *hw = dev->data->dev_private;
714         const struct rte_memzone *mz;
715         struct vmxnet3_tx_queue *txq;
716         struct vmxnet3_cmd_ring *ring;
717         struct vmxnet3_comp_ring *comp_ring;
718         struct vmxnet3_data_ring *data_ring;
719         int size;
720
721         PMD_INIT_FUNC_TRACE();
722
723         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
724             ETH_TXQ_FLAGS_NOXSUMS) {
725                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
726                 return -EINVAL;
727         }
728
729         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
730         if (txq == NULL) {
731                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
732                 return -ENOMEM;
733         }
734
735         txq->queue_id = queue_idx;
736         txq->port_id = dev->data->port_id;
737         txq->shared = &hw->tqd_start[queue_idx];
738         txq->hw = hw;
739         txq->qid = queue_idx;
740         txq->stopped = TRUE;
741
742         ring = &txq->cmd_ring;
743         comp_ring = &txq->comp_ring;
744         data_ring = &txq->data_ring;
745
746         /* Tx vmxnet ring length should be between 512-4096 */
747         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
748                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
749                              VMXNET3_DEF_TX_RING_SIZE);
750                 return -EINVAL;
751         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
752                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
753                              VMXNET3_TX_RING_MAX_SIZE);
754                 return -EINVAL;
755         } else {
756                 ring->size = nb_desc;
757                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
758         }
759         comp_ring->size = data_ring->size = ring->size;
760
761         /* Tx vmxnet rings structure initialization*/
762         ring->next2fill = 0;
763         ring->next2comp = 0;
764         ring->gen = VMXNET3_INIT_GEN;
765         comp_ring->next2proc = 0;
766         comp_ring->gen = VMXNET3_INIT_GEN;
767
768         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
769         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
770         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
771
772         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
773         if (mz == NULL) {
774                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
775                 return -ENOMEM;
776         }
777         memset(mz->addr, 0, mz->len);
778
779         /* cmd_ring initialization */
780         ring->base = mz->addr;
781         ring->basePA = mz->phys_addr;
782
783         /* comp_ring initialization */
784         comp_ring->base = ring->base + ring->size;
785         comp_ring->basePA = ring->basePA +
786                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
787
788         /* data_ring initialization */
789         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
790         data_ring->basePA = comp_ring->basePA +
791                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
792
793         /* cmd_ring0 buf_info allocation */
794         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
795                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
796         if (ring->buf_info == NULL) {
797                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
798                 return -ENOMEM;
799         }
800
801         /* Update the data portion with txq */
802         dev->data->tx_queues[queue_idx] = txq;
803
804         return 0;
805 }
806
807 int
808 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
809                            uint16_t queue_idx,
810                            uint16_t nb_desc,
811                            unsigned int socket_id,
812                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
813                            struct rte_mempool *mp)
814 {
815         const struct rte_memzone *mz;
816         struct vmxnet3_rx_queue *rxq;
817         struct vmxnet3_hw     *hw = dev->data->dev_private;
818         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
819         struct vmxnet3_comp_ring *comp_ring;
820         int size;
821         uint8_t i;
822         char mem_name[32];
823         uint16_t buf_size;
824
825         PMD_INIT_FUNC_TRACE();
826
827         buf_size = rte_pktmbuf_data_room_size(mp) -
828                 RTE_PKTMBUF_HEADROOM;
829
830         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
831                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
832                              "VMXNET3 don't support scatter packets yet",
833                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
834                 return -EINVAL;
835         }
836
837         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
838         if (rxq == NULL) {
839                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
840                 return -ENOMEM;
841         }
842
843         rxq->mp = mp;
844         rxq->queue_id = queue_idx;
845         rxq->port_id = dev->data->port_id;
846         rxq->shared = &hw->rqd_start[queue_idx];
847         rxq->hw = hw;
848         rxq->qid1 = queue_idx;
849         rxq->qid2 = queue_idx + hw->num_rx_queues;
850         rxq->stopped = TRUE;
851
852         ring0 = &rxq->cmd_ring[0];
853         ring1 = &rxq->cmd_ring[1];
854         comp_ring = &rxq->comp_ring;
855
856         /* Rx vmxnet rings length should be between 256-4096 */
857         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
858                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
859                 return -EINVAL;
860         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
861                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
862                 return -EINVAL;
863         } else {
864                 ring0->size = nb_desc;
865                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
866                 ring1->size = ring0->size;
867         }
868
869         comp_ring->size = ring0->size + ring1->size;
870
871         /* Rx vmxnet rings structure initialization */
872         ring0->next2fill = 0;
873         ring1->next2fill = 0;
874         ring0->next2comp = 0;
875         ring1->next2comp = 0;
876         ring0->gen = VMXNET3_INIT_GEN;
877         ring1->gen = VMXNET3_INIT_GEN;
878         comp_ring->next2proc = 0;
879         comp_ring->gen = VMXNET3_INIT_GEN;
880
881         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
882         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
883
884         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
885         if (mz == NULL) {
886                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
887                 return -ENOMEM;
888         }
889         memset(mz->addr, 0, mz->len);
890
891         /* cmd_ring0 initialization */
892         ring0->base = mz->addr;
893         ring0->basePA = mz->phys_addr;
894
895         /* cmd_ring1 initialization */
896         ring1->base = ring0->base + ring0->size;
897         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
898
899         /* comp_ring initialization */
900         comp_ring->base = ring1->base + ring1->size;
901         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
902                 ring1->size;
903
904         /* cmd_ring0-cmd_ring1 buf_info allocation */
905         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
906
907                 ring = &rxq->cmd_ring[i];
908                 ring->rid = i;
909                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
910
911                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
912                 if (ring->buf_info == NULL) {
913                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
914                         return -ENOMEM;
915                 }
916         }
917
918         /* Update the data portion with rxq */
919         dev->data->rx_queues[queue_idx] = rxq;
920
921         return 0;
922 }
923
924 /*
925  * Initializes Receive Unit
926  * Load mbufs in rx queue in advance
927  */
928 int
929 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
930 {
931         struct vmxnet3_hw *hw = dev->data->dev_private;
932
933         int i, ret;
934         uint8_t j;
935
936         PMD_INIT_FUNC_TRACE();
937
938         for (i = 0; i < hw->num_rx_queues; i++) {
939                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
940
941                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
942                         /* Passing 0 as alloc_num will allocate full ring */
943                         ret = vmxnet3_post_rx_bufs(rxq, j);
944                         if (ret <= 0) {
945                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
946                                 return -ret;
947                         }
948                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
949                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
950                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
951                                                        rxq->cmd_ring[j].next2fill);
952                         }
953                 }
954                 rxq->stopped = FALSE;
955         }
956
957         for (i = 0; i < dev->data->nb_tx_queues; i++) {
958                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
959
960                 txq->stopped = FALSE;
961         }
962
963         return 0;
964 }
965
966 static uint8_t rss_intel_key[40] = {
967         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
968         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
969         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
970         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
971         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
972 };
973
974 /*
975  * Configure RSS feature
976  */
977 int
978 vmxnet3_rss_configure(struct rte_eth_dev *dev)
979 {
980         struct vmxnet3_hw *hw = dev->data->dev_private;
981         struct VMXNET3_RSSConf *dev_rss_conf;
982         struct rte_eth_rss_conf *port_rss_conf;
983         uint64_t rss_hf;
984         uint8_t i, j;
985
986         PMD_INIT_FUNC_TRACE();
987
988         dev_rss_conf = hw->rss_conf;
989         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
990
991         /* loading hashFunc */
992         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
993         /* loading hashKeySize */
994         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
995         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
996         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
997
998         if (port_rss_conf->rss_key == NULL) {
999                 /* Default hash key */
1000                 port_rss_conf->rss_key = rss_intel_key;
1001         }
1002
1003         /* loading hashKey */
1004         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1005
1006         /* loading indTable */
1007         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1008                 if (j == dev->data->nb_rx_queues)
1009                         j = 0;
1010                 dev_rss_conf->indTable[i] = j;
1011         }
1012
1013         /* loading hashType */
1014         dev_rss_conf->hashType = 0;
1015         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1016         if (rss_hf & ETH_RSS_IPV4)
1017                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1018         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1019                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1020         if (rss_hf & ETH_RSS_IPV6)
1021                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1022         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1023                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1024
1025         return VMXNET3_SUCCESS;
1026 }