f88ced77a89be3bfd194ff3f1bc2cf9e2483af37
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
87 #endif
88
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
91 {
92         struct rte_mbuf *m;
93
94         m = __rte_mbuf_raw_alloc(mp);
95         __rte_mbuf_sanity_check_raw(m, 0);
96         return m;
97 }
98
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
100 static void
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
102 {
103         uint32_t avail = 0;
104
105         if (rxq == NULL)
106                 return;
107
108         PMD_RX_LOG(DEBUG,
109                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
111         PMD_RX_LOG(DEBUG,
112                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113                    (unsigned long)rxq->cmd_ring[0].basePA,
114                    (unsigned long)rxq->cmd_ring[1].basePA,
115                    (unsigned long)rxq->comp_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120                    (uint32_t)rxq->cmd_ring[0].size, avail,
121                    rxq->comp_ring.next2proc,
122                    rxq->cmd_ring[0].size - avail);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127                    rxq->cmd_ring[1].size - avail);
128
129 }
130
131 static void
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
133 {
134         uint32_t avail = 0;
135
136         if (txq == NULL)
137                 return;
138
139         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142                    (unsigned long)txq->cmd_ring.basePA,
143                    (unsigned long)txq->comp_ring.basePA,
144                    (unsigned long)txq->data_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148                    (uint32_t)txq->cmd_ring.size, avail,
149                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static void
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159
160                 if (buf_info->m) {
161                         rte_pktmbuf_free(buf_info->m);
162                         buf_info->m = NULL;
163                         buf_info->bufPA = 0;
164                         buf_info->len = 0;
165                 }
166                 vmxnet3_cmd_ring_adv_next2comp(ring);
167         }
168 }
169
170 static void
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
172 {
173         vmxnet3_cmd_ring_release_mbufs(ring);
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (tq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 static void
204 vmxnet3_dev_tx_queue_reset(void *txq)
205 {
206         vmxnet3_tx_queue_t *tq = txq;
207         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
210         int size;
211
212         if (tq != NULL) {
213                 /* Release the cmd_ring mbufs */
214                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
215         }
216
217         /* Tx vmxnet rings structure initialization*/
218         ring->next2fill = 0;
219         ring->next2comp = 0;
220         ring->gen = VMXNET3_INIT_GEN;
221         comp_ring->next2proc = 0;
222         comp_ring->gen = VMXNET3_INIT_GEN;
223
224         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
227
228         memset(ring->base, 0, size);
229 }
230
231 static void
232 vmxnet3_dev_rx_queue_reset(void *rxq)
233 {
234         int i;
235         vmxnet3_rx_queue_t *rq = rxq;
236         struct vmxnet3_cmd_ring *ring0, *ring1;
237         struct vmxnet3_comp_ring *comp_ring;
238         int size;
239
240         if (rq != NULL) {
241                 /* Release both the cmd_rings mbufs */
242                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
244         }
245
246         ring0 = &rq->cmd_ring[0];
247         ring1 = &rq->cmd_ring[1];
248         comp_ring = &rq->comp_ring;
249
250         /* Rx vmxnet rings structure initialization */
251         ring0->next2fill = 0;
252         ring1->next2fill = 0;
253         ring0->next2comp = 0;
254         ring1->next2comp = 0;
255         ring0->gen = VMXNET3_INIT_GEN;
256         ring1->gen = VMXNET3_INIT_GEN;
257         comp_ring->next2proc = 0;
258         comp_ring->gen = VMXNET3_INIT_GEN;
259
260         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
262
263         memset(ring0->base, 0, size);
264 }
265
266 void
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
268 {
269         unsigned i;
270
271         PMD_INIT_FUNC_TRACE();
272
273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
274                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
275
276                 if (txq != NULL) {
277                         txq->stopped = TRUE;
278                         vmxnet3_dev_tx_queue_reset(txq);
279                 }
280         }
281
282         for (i = 0; i < dev->data->nb_rx_queues; i++) {
283                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
284
285                 if (rxq != NULL) {
286                         rxq->stopped = TRUE;
287                         vmxnet3_dev_rx_queue_reset(rxq);
288                 }
289         }
290 }
291
292 static void
293 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
294 {
295         int completed = 0;
296         struct rte_mbuf *mbuf;
297         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
298         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
299                 (comp_ring->base + comp_ring->next2proc);
300
301         while (tcd->gen == comp_ring->gen) {
302                 /* Release cmd_ring descriptor and free mbuf */
303                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
304                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
305                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
306                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
307                         rte_pktmbuf_free_seg(mbuf);
308
309                         /* Mark the txd for which tcd was generated as completed */
310                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
311                         completed++;
312                 }
313
314                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
315                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
316                                                     comp_ring->next2proc);
317         }
318
319         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
320 }
321
322 uint16_t
323 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
324                   uint16_t nb_pkts)
325 {
326         uint16_t nb_tx;
327         vmxnet3_tx_queue_t *txq = tx_queue;
328         struct vmxnet3_hw *hw = txq->hw;
329         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
330         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
331
332         if (unlikely(txq->stopped)) {
333                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
334                 return 0;
335         }
336
337         /* Free up the comp_descriptors aggressively */
338         vmxnet3_tq_tx_complete(txq);
339
340         nb_tx = 0;
341         while (nb_tx < nb_pkts) {
342                 Vmxnet3_GenericDesc *gdesc;
343                 vmxnet3_buf_info_t *tbi;
344                 uint32_t first2fill, avail, dw2;
345                 struct rte_mbuf *txm = tx_pkts[nb_tx];
346                 struct rte_mbuf *m_seg = txm;
347                 int copy_size = 0;
348
349                 /* Is this packet execessively fragmented, then drop */
350                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
351                         ++txq->stats.drop_too_many_segs;
352                         ++txq->stats.drop_total;
353                         rte_pktmbuf_free(txm);
354                         ++nb_tx;
355                         continue;
356                 }
357
358                 /* Is command ring full? */
359                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
360                 if (txm->nb_segs > avail) {
361                         ++txq->stats.tx_ring_full;
362                         break;
363                 }
364
365                 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
366                         struct Vmxnet3_TxDataDesc *tdd;
367
368                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
369                         copy_size = rte_pktmbuf_pkt_len(txm);
370                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
371                 }
372
373                 /* use the previous gen bit for the SOP desc */
374                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
375                 first2fill = txq->cmd_ring.next2fill;
376                 do {
377                         /* Remember the transmit buffer for cleanup */
378                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
379                         tbi->m = m_seg;
380
381                         /* NB: the following assumes that VMXNET3 maximum
382                            transmit buffer size (16K) is greater than
383                            maximum sizeof mbuf segment size. */
384                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
385                         if (copy_size)
386                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
387                                                                 txq->cmd_ring.next2fill *
388                                                                 sizeof(struct Vmxnet3_TxDataDesc));
389                         else
390                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
391
392                         gdesc->dword[2] = dw2 | m_seg->data_len;
393                         gdesc->dword[3] = 0;
394
395                         /* move to the next2fill descriptor */
396                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
397
398                         /* use the right gen for non-SOP desc */
399                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
400                 } while ((m_seg = m_seg->next) != NULL);
401
402                 /* Update the EOP descriptor */
403                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
404
405                 /* Add VLAN tag if present */
406                 gdesc = txq->cmd_ring.base + first2fill;
407                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
408                         gdesc->txd.ti = 1;
409                         gdesc->txd.tci = txm->vlan_tci;
410                 }
411
412                 /* TODO: Add transmit checksum offload here */
413
414                 /* flip the GEN bit on the SOP */
415                 rte_compiler_barrier();
416                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
417
418                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(++deferred);
419                 nb_tx++;
420         }
421
422         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
423
424         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
425                 txq_ctrl->txNumDeferred = 0;
426                 /* Notify vSwitch that packets are available. */
427                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
428                                        txq->cmd_ring.next2fill);
429         }
430
431         return nb_tx;
432 }
433
434 /*
435  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
436  *  so that device can receive packets in those buffers.
437  *      Ring layout:
438  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
439  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
440  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
441  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
442  *      only for LRO.
443  *
444  */
445 static int
446 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
447 {
448         int err = 0;
449         uint32_t i = 0, val = 0;
450         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
451
452         if (ring_id == 0) {
453                 /* Usually: One HEAD type buf per packet
454                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
455                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
456                  */
457
458                 /* We use single packet buffer so all heads here */
459                 val = VMXNET3_RXD_BTYPE_HEAD;
460         } else {
461                 /* All BODY type buffers for 2nd ring */
462                 val = VMXNET3_RXD_BTYPE_BODY;
463         }
464
465         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
466                 struct Vmxnet3_RxDesc *rxd;
467                 struct rte_mbuf *mbuf;
468                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
469
470                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
471
472                 /* Allocate blank mbuf for the current Rx Descriptor */
473                 mbuf = rte_rxmbuf_alloc(rxq->mp);
474                 if (unlikely(mbuf == NULL)) {
475                         PMD_RX_LOG(ERR, "Error allocating mbuf");
476                         rxq->stats.rx_buf_alloc_failure++;
477                         err = ENOMEM;
478                         break;
479                 }
480
481                 /*
482                  * Load mbuf pointer into buf_info[ring_size]
483                  * buf_info structure is equivalent to cookie for virtio-virtqueue
484                  */
485                 buf_info->m = mbuf;
486                 buf_info->len = (uint16_t)(mbuf->buf_len -
487                                            RTE_PKTMBUF_HEADROOM);
488                 buf_info->bufPA =
489                         rte_mbuf_data_dma_addr_default(mbuf);
490
491                 /* Load Rx Descriptor with the buffer's GPA */
492                 rxd->addr = buf_info->bufPA;
493
494                 /* After this point rxd->addr MUST not be NULL */
495                 rxd->btype = val;
496                 rxd->len = buf_info->len;
497                 /* Flip gen bit at the end to change ownership */
498                 rxd->gen = ring->gen;
499
500                 vmxnet3_cmd_ring_adv_next2fill(ring);
501                 i++;
502         }
503
504         /* Return error only if no buffers are posted at present */
505         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
506                 return -err;
507         else
508                 return i;
509 }
510
511
512 /* Receive side checksum and other offloads */
513 static void
514 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
515 {
516         /* Check for hardware stripped VLAN tag */
517         if (rcd->ts) {
518                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
519                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
520         }
521
522         /* Check for RSS */
523         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
524                 rxm->ol_flags |= PKT_RX_RSS_HASH;
525                 rxm->hash.rss = rcd->rssHash;
526         }
527
528         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
529         if (rcd->v4) {
530                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
531                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
532
533                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
534                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
535                 else
536                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
537
538                 if (!rcd->cnc) {
539                         if (!rcd->ipc)
540                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
541
542                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
543                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
544                 }
545         }
546 }
547
548 /*
549  * Process the Rx Completion Ring of given vmxnet3_rx_queue
550  * for nb_pkts burst and return the number of packets received
551  */
552 uint16_t
553 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
554 {
555         uint16_t nb_rx;
556         uint32_t nb_rxd, idx;
557         uint8_t ring_idx;
558         vmxnet3_rx_queue_t *rxq;
559         Vmxnet3_RxCompDesc *rcd;
560         vmxnet3_buf_info_t *rbi;
561         Vmxnet3_RxDesc *rxd;
562         struct rte_mbuf *rxm = NULL;
563         struct vmxnet3_hw *hw;
564
565         nb_rx = 0;
566         ring_idx = 0;
567         nb_rxd = 0;
568         idx = 0;
569
570         rxq = rx_queue;
571         hw = rxq->hw;
572
573         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
574
575         if (unlikely(rxq->stopped)) {
576                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
577                 return 0;
578         }
579
580         while (rcd->gen == rxq->comp_ring.gen) {
581                 if (nb_rx >= nb_pkts)
582                         break;
583
584                 idx = rcd->rxdIdx;
585                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
586                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
587                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
588
589                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
590                         rte_pktmbuf_free_seg(rbi->m);
591                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
592                         goto rcd_done;
593                 }
594
595                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
596
597                 VMXNET3_ASSERT(rcd->len <= rxd->len);
598                 VMXNET3_ASSERT(rbi->m);
599
600                 if (unlikely(rcd->len == 0)) {
601                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
602                                    ring_idx, idx);
603                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
604                         rte_pktmbuf_free_seg(rbi->m);
605                         goto rcd_done;
606                 }
607
608                 /* Assuming a packet is coming in a single packet buffer */
609                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
610                         PMD_RX_LOG(DEBUG,
611                                    "Alert : Misbehaving device, incorrect "
612                                    " buffer type used. Packet dropped.");
613                         rte_pktmbuf_free_seg(rbi->m);
614                         goto rcd_done;
615                 }
616                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
617
618                 /* Get the packet buffer pointer from buf_info */
619                 rxm = rbi->m;
620
621                 /* Clear descriptor associated buf_info to be reused */
622                 rbi->m = NULL;
623                 rbi->bufPA = 0;
624
625                 /* Update the index that we received a packet */
626                 rxq->cmd_ring[ring_idx].next2comp = idx;
627
628                 /* For RCD with EOP set, check if there is frame error */
629                 if (unlikely(rcd->err)) {
630                         rxq->stats.drop_total++;
631                         rxq->stats.drop_err++;
632
633                         if (!rcd->fcs) {
634                                 rxq->stats.drop_fcs++;
635                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
636                         }
637                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
638                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
639                                          rxq->comp_ring.base), rcd->rxdIdx);
640                         rte_pktmbuf_free_seg(rxm);
641                         goto rcd_done;
642                 }
643
644
645                 /* Initialize newly received packet buffer */
646                 rxm->port = rxq->port_id;
647                 rxm->nb_segs = 1;
648                 rxm->next = NULL;
649                 rxm->pkt_len = (uint16_t)rcd->len;
650                 rxm->data_len = (uint16_t)rcd->len;
651                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
652                 rxm->ol_flags = 0;
653                 rxm->vlan_tci = 0;
654
655                 vmxnet3_rx_offload(rcd, rxm);
656
657                 rx_pkts[nb_rx++] = rxm;
658 rcd_done:
659                 rxq->cmd_ring[ring_idx].next2comp = idx;
660                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
661
662                 /* It's time to allocate some new buf and renew descriptors */
663                 vmxnet3_post_rx_bufs(rxq, ring_idx);
664                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
665                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
666                                                rxq->cmd_ring[ring_idx].next2fill);
667                 }
668
669                 /* Advance to the next descriptor in comp_ring */
670                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
671
672                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
673                 nb_rxd++;
674                 if (nb_rxd > rxq->cmd_ring[0].size) {
675                         PMD_RX_LOG(ERR,
676                                    "Used up quota of receiving packets,"
677                                    " relinquish control.");
678                         break;
679                 }
680         }
681
682         return nb_rx;
683 }
684
685 /*
686  * Create memzone for device rings. malloc can't be used as the physical address is
687  * needed. If the memzone is already created, then this function returns a ptr
688  * to the old one.
689  */
690 static const struct rte_memzone *
691 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
692                       uint16_t queue_id, uint32_t ring_size, int socket_id)
693 {
694         char z_name[RTE_MEMZONE_NAMESIZE];
695         const struct rte_memzone *mz;
696
697         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
698                         dev->driver->pci_drv.name, ring_name,
699                         dev->data->port_id, queue_id);
700
701         mz = rte_memzone_lookup(z_name);
702         if (mz)
703                 return mz;
704
705         return rte_memzone_reserve_aligned(z_name, ring_size,
706                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
707 }
708
709 int
710 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
711                            uint16_t queue_idx,
712                            uint16_t nb_desc,
713                            unsigned int socket_id,
714                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
715 {
716         struct vmxnet3_hw *hw = dev->data->dev_private;
717         const struct rte_memzone *mz;
718         struct vmxnet3_tx_queue *txq;
719         struct vmxnet3_cmd_ring *ring;
720         struct vmxnet3_comp_ring *comp_ring;
721         struct vmxnet3_data_ring *data_ring;
722         int size;
723
724         PMD_INIT_FUNC_TRACE();
725
726         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
727             ETH_TXQ_FLAGS_NOXSUMS) {
728                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
729                 return -EINVAL;
730         }
731
732         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
733         if (txq == NULL) {
734                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
735                 return -ENOMEM;
736         }
737
738         txq->queue_id = queue_idx;
739         txq->port_id = dev->data->port_id;
740         txq->shared = &hw->tqd_start[queue_idx];
741         txq->hw = hw;
742         txq->qid = queue_idx;
743         txq->stopped = TRUE;
744
745         ring = &txq->cmd_ring;
746         comp_ring = &txq->comp_ring;
747         data_ring = &txq->data_ring;
748
749         /* Tx vmxnet ring length should be between 512-4096 */
750         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
751                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
752                              VMXNET3_DEF_TX_RING_SIZE);
753                 return -EINVAL;
754         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
755                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
756                              VMXNET3_TX_RING_MAX_SIZE);
757                 return -EINVAL;
758         } else {
759                 ring->size = nb_desc;
760                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
761         }
762         comp_ring->size = data_ring->size = ring->size;
763
764         /* Tx vmxnet rings structure initialization*/
765         ring->next2fill = 0;
766         ring->next2comp = 0;
767         ring->gen = VMXNET3_INIT_GEN;
768         comp_ring->next2proc = 0;
769         comp_ring->gen = VMXNET3_INIT_GEN;
770
771         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
772         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
773         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
774
775         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
776         if (mz == NULL) {
777                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
778                 return -ENOMEM;
779         }
780         memset(mz->addr, 0, mz->len);
781
782         /* cmd_ring initialization */
783         ring->base = mz->addr;
784         ring->basePA = mz->phys_addr;
785
786         /* comp_ring initialization */
787         comp_ring->base = ring->base + ring->size;
788         comp_ring->basePA = ring->basePA +
789                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
790
791         /* data_ring initialization */
792         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
793         data_ring->basePA = comp_ring->basePA +
794                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
795
796         /* cmd_ring0 buf_info allocation */
797         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
798                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
799         if (ring->buf_info == NULL) {
800                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
801                 return -ENOMEM;
802         }
803
804         /* Update the data portion with txq */
805         dev->data->tx_queues[queue_idx] = txq;
806
807         return 0;
808 }
809
810 int
811 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
812                            uint16_t queue_idx,
813                            uint16_t nb_desc,
814                            unsigned int socket_id,
815                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
816                            struct rte_mempool *mp)
817 {
818         const struct rte_memzone *mz;
819         struct vmxnet3_rx_queue *rxq;
820         struct vmxnet3_hw     *hw = dev->data->dev_private;
821         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
822         struct vmxnet3_comp_ring *comp_ring;
823         int size;
824         uint8_t i;
825         char mem_name[32];
826         uint16_t buf_size;
827
828         PMD_INIT_FUNC_TRACE();
829
830         buf_size = rte_pktmbuf_data_room_size(mp) -
831                 RTE_PKTMBUF_HEADROOM;
832
833         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
834                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
835                              "VMXNET3 don't support scatter packets yet",
836                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
837                 return -EINVAL;
838         }
839
840         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
841         if (rxq == NULL) {
842                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
843                 return -ENOMEM;
844         }
845
846         rxq->mp = mp;
847         rxq->queue_id = queue_idx;
848         rxq->port_id = dev->data->port_id;
849         rxq->shared = &hw->rqd_start[queue_idx];
850         rxq->hw = hw;
851         rxq->qid1 = queue_idx;
852         rxq->qid2 = queue_idx + hw->num_rx_queues;
853         rxq->stopped = TRUE;
854
855         ring0 = &rxq->cmd_ring[0];
856         ring1 = &rxq->cmd_ring[1];
857         comp_ring = &rxq->comp_ring;
858
859         /* Rx vmxnet rings length should be between 256-4096 */
860         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
861                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
862                 return -EINVAL;
863         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
864                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
865                 return -EINVAL;
866         } else {
867                 ring0->size = nb_desc;
868                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
869                 ring1->size = ring0->size;
870         }
871
872         comp_ring->size = ring0->size + ring1->size;
873
874         /* Rx vmxnet rings structure initialization */
875         ring0->next2fill = 0;
876         ring1->next2fill = 0;
877         ring0->next2comp = 0;
878         ring1->next2comp = 0;
879         ring0->gen = VMXNET3_INIT_GEN;
880         ring1->gen = VMXNET3_INIT_GEN;
881         comp_ring->next2proc = 0;
882         comp_ring->gen = VMXNET3_INIT_GEN;
883
884         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
885         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
886
887         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
888         if (mz == NULL) {
889                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
890                 return -ENOMEM;
891         }
892         memset(mz->addr, 0, mz->len);
893
894         /* cmd_ring0 initialization */
895         ring0->base = mz->addr;
896         ring0->basePA = mz->phys_addr;
897
898         /* cmd_ring1 initialization */
899         ring1->base = ring0->base + ring0->size;
900         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
901
902         /* comp_ring initialization */
903         comp_ring->base = ring1->base + ring1->size;
904         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
905                 ring1->size;
906
907         /* cmd_ring0-cmd_ring1 buf_info allocation */
908         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
909
910                 ring = &rxq->cmd_ring[i];
911                 ring->rid = i;
912                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
913
914                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
915                 if (ring->buf_info == NULL) {
916                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
917                         return -ENOMEM;
918                 }
919         }
920
921         /* Update the data portion with rxq */
922         dev->data->rx_queues[queue_idx] = rxq;
923
924         return 0;
925 }
926
927 /*
928  * Initializes Receive Unit
929  * Load mbufs in rx queue in advance
930  */
931 int
932 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
933 {
934         struct vmxnet3_hw *hw = dev->data->dev_private;
935
936         int i, ret;
937         uint8_t j;
938
939         PMD_INIT_FUNC_TRACE();
940
941         for (i = 0; i < hw->num_rx_queues; i++) {
942                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
943
944                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
945                         /* Passing 0 as alloc_num will allocate full ring */
946                         ret = vmxnet3_post_rx_bufs(rxq, j);
947                         if (ret <= 0) {
948                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
949                                 return -ret;
950                         }
951                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
952                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
953                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
954                                                        rxq->cmd_ring[j].next2fill);
955                         }
956                 }
957                 rxq->stopped = FALSE;
958         }
959
960         for (i = 0; i < dev->data->nb_tx_queues; i++) {
961                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
962
963                 txq->stopped = FALSE;
964         }
965
966         return 0;
967 }
968
969 static uint8_t rss_intel_key[40] = {
970         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
971         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
972         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
973         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
974         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
975 };
976
977 /*
978  * Configure RSS feature
979  */
980 int
981 vmxnet3_rss_configure(struct rte_eth_dev *dev)
982 {
983         struct vmxnet3_hw *hw = dev->data->dev_private;
984         struct VMXNET3_RSSConf *dev_rss_conf;
985         struct rte_eth_rss_conf *port_rss_conf;
986         uint64_t rss_hf;
987         uint8_t i, j;
988
989         PMD_INIT_FUNC_TRACE();
990
991         dev_rss_conf = hw->rss_conf;
992         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
993
994         /* loading hashFunc */
995         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
996         /* loading hashKeySize */
997         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
998         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
999         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1000
1001         if (port_rss_conf->rss_key == NULL) {
1002                 /* Default hash key */
1003                 port_rss_conf->rss_key = rss_intel_key;
1004         }
1005
1006         /* loading hashKey */
1007         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1008
1009         /* loading indTable */
1010         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1011                 if (j == dev->data->nb_rx_queues)
1012                         j = 0;
1013                 dev_rss_conf->indTable[i] = j;
1014         }
1015
1016         /* loading hashType */
1017         dev_rss_conf->hashType = 0;
1018         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1019         if (rss_hf & ETH_RSS_IPV4)
1020                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1021         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1022                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1023         if (rss_hf & ETH_RSS_IPV6)
1024                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1025         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1026                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1027
1028         return VMXNET3_SUCCESS;
1029 }