mbuf: get DMA address
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
87 #endif
88
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
91 {
92         struct rte_mbuf *m;
93
94         m = __rte_mbuf_raw_alloc(mp);
95         __rte_mbuf_sanity_check_raw(m, 0);
96         return m;
97 }
98
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
100 static void
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
102 {
103         uint32_t avail = 0;
104
105         if (rxq == NULL)
106                 return;
107
108         PMD_RX_LOG(DEBUG,
109                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
111         PMD_RX_LOG(DEBUG,
112                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113                    (unsigned long)rxq->cmd_ring[0].basePA,
114                    (unsigned long)rxq->cmd_ring[1].basePA,
115                    (unsigned long)rxq->comp_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120                    (uint32_t)rxq->cmd_ring[0].size, avail,
121                    rxq->comp_ring.next2proc,
122                    rxq->cmd_ring[0].size - avail);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127                    rxq->cmd_ring[1].size - avail);
128
129 }
130
131 static void
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
133 {
134         uint32_t avail = 0;
135
136         if (txq == NULL)
137                 return;
138
139         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142                    (unsigned long)txq->cmd_ring.basePA,
143                    (unsigned long)txq->comp_ring.basePA,
144                    (unsigned long)txq->data_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148                    (uint32_t)txq->cmd_ring.size, avail,
149                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static void
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159
160                 if (buf_info->m) {
161                         rte_pktmbuf_free(buf_info->m);
162                         buf_info->m = NULL;
163                         buf_info->bufPA = 0;
164                         buf_info->len = 0;
165                 }
166                 vmxnet3_cmd_ring_adv_next2comp(ring);
167         }
168 }
169
170 static void
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
172 {
173         vmxnet3_cmd_ring_release_mbufs(ring);
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (tq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 static void
204 vmxnet3_dev_tx_queue_reset(void *txq)
205 {
206         vmxnet3_tx_queue_t *tq = txq;
207         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
210         int size;
211
212         if (tq != NULL) {
213                 /* Release the cmd_ring mbufs */
214                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
215         }
216
217         /* Tx vmxnet rings structure initialization*/
218         ring->next2fill = 0;
219         ring->next2comp = 0;
220         ring->gen = VMXNET3_INIT_GEN;
221         comp_ring->next2proc = 0;
222         comp_ring->gen = VMXNET3_INIT_GEN;
223
224         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
227
228         memset(ring->base, 0, size);
229 }
230
231 static void
232 vmxnet3_dev_rx_queue_reset(void *rxq)
233 {
234         int i;
235         vmxnet3_rx_queue_t *rq = rxq;
236         struct vmxnet3_cmd_ring *ring0, *ring1;
237         struct vmxnet3_comp_ring *comp_ring;
238         int size;
239
240         if (rq != NULL) {
241                 /* Release both the cmd_rings mbufs */
242                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
244         }
245
246         ring0 = &rq->cmd_ring[0];
247         ring1 = &rq->cmd_ring[1];
248         comp_ring = &rq->comp_ring;
249
250         /* Rx vmxnet rings structure initialization */
251         ring0->next2fill = 0;
252         ring1->next2fill = 0;
253         ring0->next2comp = 0;
254         ring1->next2comp = 0;
255         ring0->gen = VMXNET3_INIT_GEN;
256         ring1->gen = VMXNET3_INIT_GEN;
257         comp_ring->next2proc = 0;
258         comp_ring->gen = VMXNET3_INIT_GEN;
259
260         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
262
263         memset(ring0->base, 0, size);
264 }
265
266 void
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
268 {
269         unsigned i;
270
271         PMD_INIT_FUNC_TRACE();
272
273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
274                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
275
276                 if (txq != NULL) {
277                         txq->stopped = TRUE;
278                         vmxnet3_dev_tx_queue_reset(txq);
279                 }
280         }
281
282         for (i = 0; i < dev->data->nb_rx_queues; i++) {
283                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
284
285                 if (rxq != NULL) {
286                         rxq->stopped = TRUE;
287                         vmxnet3_dev_rx_queue_reset(rxq);
288                 }
289         }
290 }
291
292 static void
293 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
294 {
295         int completed = 0;
296         struct rte_mbuf *mbuf;
297         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
298         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
299                 (comp_ring->base + comp_ring->next2proc);
300
301         while (tcd->gen == comp_ring->gen) {
302                 /* Release cmd_ring descriptor and free mbuf */
303                 VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
304                 while (txq->cmd_ring.next2comp != tcd->txdIdx) {
305                         mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
306                         txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
307                         rte_pktmbuf_free_seg(mbuf);
308
309                         /* Mark the txd for which tcd was generated as completed */
310                         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
311                         completed++;
312                 }
313
314                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
315                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
316                                                     comp_ring->next2proc);
317         }
318
319         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
320 }
321
322 uint16_t
323 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
324                   uint16_t nb_pkts)
325 {
326         uint16_t nb_tx;
327         vmxnet3_tx_queue_t *txq = tx_queue;
328         struct vmxnet3_hw *hw = txq->hw;
329
330         if (unlikely(txq->stopped)) {
331                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
332                 return 0;
333         }
334
335         /* Free up the comp_descriptors aggressively */
336         vmxnet3_tq_tx_complete(txq);
337
338         nb_tx = 0;
339         while (nb_tx < nb_pkts) {
340                 Vmxnet3_GenericDesc *gdesc;
341                 vmxnet3_buf_info_t *tbi;
342                 uint32_t first2fill, avail, dw2;
343                 struct rte_mbuf *txm = tx_pkts[nb_tx];
344                 struct rte_mbuf *m_seg = txm;
345
346                 /* Is this packet execessively fragmented, then drop */
347                 if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
348                         ++txq->stats.drop_too_many_segs;
349                         ++txq->stats.drop_total;
350                         rte_pktmbuf_free(txm);
351                         ++nb_tx;
352                         continue;
353                 }
354
355                 /* Is command ring full? */
356                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
357                 if (txm->nb_segs > avail) {
358                         ++txq->stats.tx_ring_full;
359                         break;
360                 }
361
362                 /* use the previous gen bit for the SOP desc */
363                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
364                 first2fill = txq->cmd_ring.next2fill;
365                 do {
366                         /* Remember the transmit buffer for cleanup */
367                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
368                         tbi->m = m_seg;
369
370                         /* NB: the following assumes that VMXNET3 maximum
371                            transmit buffer size (16K) is greater than
372                            maximum sizeof mbuf segment size. */
373                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
374                         gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
375                         gdesc->dword[2] = dw2 | m_seg->data_len;
376                         gdesc->dword[3] = 0;
377
378                         /* move to the next2fill descriptor */
379                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
380
381                         /* use the right gen for non-SOP desc */
382                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
383                 } while ((m_seg = m_seg->next) != NULL);
384
385                 /* Update the EOP descriptor */
386                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
387
388                 /* Add VLAN tag if present */
389                 gdesc = txq->cmd_ring.base + first2fill;
390                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
391                         gdesc->txd.ti = 1;
392                         gdesc->txd.tci = txm->vlan_tci;
393                 }
394
395                 /* TODO: Add transmit checksum offload here */
396
397                 /* flip the GEN bit on the SOP */
398                 rte_compiler_barrier();
399                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
400
401                 txq->shared->ctrl.txNumDeferred++;
402                 nb_tx++;
403         }
404
405         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", txq->shared->ctrl.txThreshold);
406
407         if (txq->shared->ctrl.txNumDeferred >= txq->shared->ctrl.txThreshold) {
408
409                 txq->shared->ctrl.txNumDeferred = 0;
410                 /* Notify vSwitch that packets are available. */
411                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
412                                        txq->cmd_ring.next2fill);
413         }
414
415         return nb_tx;
416 }
417
418 /*
419  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
420  *  so that device can receive packets in those buffers.
421  *      Ring layout:
422  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
423  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
424  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
425  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
426  *      only for LRO.
427  *
428  */
429 static int
430 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
431 {
432         int err = 0;
433         uint32_t i = 0, val = 0;
434         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
435
436         if (ring_id == 0) {
437                 /* Usually: One HEAD type buf per packet
438                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
439                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
440                  */
441
442                 /* We use single packet buffer so all heads here */
443                 val = VMXNET3_RXD_BTYPE_HEAD;
444         } else {
445                 /* All BODY type buffers for 2nd ring */
446                 val = VMXNET3_RXD_BTYPE_BODY;
447         }
448
449         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
450                 struct Vmxnet3_RxDesc *rxd;
451                 struct rte_mbuf *mbuf;
452                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
453
454                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
455
456                 /* Allocate blank mbuf for the current Rx Descriptor */
457                 mbuf = rte_rxmbuf_alloc(rxq->mp);
458                 if (unlikely(mbuf == NULL)) {
459                         PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__);
460                         rxq->stats.rx_buf_alloc_failure++;
461                         err = ENOMEM;
462                         break;
463                 }
464
465                 /*
466                  * Load mbuf pointer into buf_info[ring_size]
467                  * buf_info structure is equivalent to cookie for virtio-virtqueue
468                  */
469                 buf_info->m = mbuf;
470                 buf_info->len = (uint16_t)(mbuf->buf_len -
471                                            RTE_PKTMBUF_HEADROOM);
472                 buf_info->bufPA =
473                         rte_mbuf_data_dma_addr_default(mbuf);
474
475                 /* Load Rx Descriptor with the buffer's GPA */
476                 rxd->addr = buf_info->bufPA;
477
478                 /* After this point rxd->addr MUST not be NULL */
479                 rxd->btype = val;
480                 rxd->len = buf_info->len;
481                 /* Flip gen bit at the end to change ownership */
482                 rxd->gen = ring->gen;
483
484                 vmxnet3_cmd_ring_adv_next2fill(ring);
485                 i++;
486         }
487
488         /* Return error only if no buffers are posted at present */
489         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
490                 return -err;
491         else
492                 return i;
493 }
494
495
496 /* Receive side checksum and other offloads */
497 static void
498 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
499 {
500         /* Check for hardware stripped VLAN tag */
501         if (rcd->ts) {
502                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
503                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
504         }
505
506         /* Check for RSS */
507         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
508                 rxm->ol_flags |= PKT_RX_RSS_HASH;
509                 rxm->hash.rss = rcd->rssHash;
510         }
511
512         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
513         if (rcd->v4) {
514                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
515                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
516
517                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
518                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
519                 else
520                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
521
522                 if (!rcd->cnc) {
523                         if (!rcd->ipc)
524                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
525
526                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
527                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
528                 }
529         }
530 }
531
532 /*
533  * Process the Rx Completion Ring of given vmxnet3_rx_queue
534  * for nb_pkts burst and return the number of packets received
535  */
536 uint16_t
537 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
538 {
539         uint16_t nb_rx;
540         uint32_t nb_rxd, idx;
541         uint8_t ring_idx;
542         vmxnet3_rx_queue_t *rxq;
543         Vmxnet3_RxCompDesc *rcd;
544         vmxnet3_buf_info_t *rbi;
545         Vmxnet3_RxDesc *rxd;
546         struct rte_mbuf *rxm = NULL;
547         struct vmxnet3_hw *hw;
548
549         nb_rx = 0;
550         ring_idx = 0;
551         nb_rxd = 0;
552         idx = 0;
553
554         rxq = rx_queue;
555         hw = rxq->hw;
556
557         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
558
559         if (unlikely(rxq->stopped)) {
560                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
561                 return 0;
562         }
563
564         while (rcd->gen == rxq->comp_ring.gen) {
565                 if (nb_rx >= nb_pkts)
566                         break;
567
568                 idx = rcd->rxdIdx;
569                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
570                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
571                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
572
573                 if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
574                         rte_pktmbuf_free_seg(rbi->m);
575                         PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
576                         goto rcd_done;
577                 }
578
579                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
580
581                 VMXNET3_ASSERT(rcd->len <= rxd->len);
582                 VMXNET3_ASSERT(rbi->m);
583
584                 if (unlikely(rcd->len == 0)) {
585                         PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
586                                    ring_idx, idx);
587                         VMXNET3_ASSERT(rcd->sop && rcd->eop);
588                         rte_pktmbuf_free_seg(rbi->m);
589                         goto rcd_done;
590                 }
591
592                 /* Assuming a packet is coming in a single packet buffer */
593                 if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
594                         PMD_RX_LOG(DEBUG,
595                                    "Alert : Misbehaving device, incorrect "
596                                    " buffer type used. iPacket dropped.");
597                         rte_pktmbuf_free_seg(rbi->m);
598                         goto rcd_done;
599                 }
600                 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
601
602                 /* Get the packet buffer pointer from buf_info */
603                 rxm = rbi->m;
604
605                 /* Clear descriptor associated buf_info to be reused */
606                 rbi->m = NULL;
607                 rbi->bufPA = 0;
608
609                 /* Update the index that we received a packet */
610                 rxq->cmd_ring[ring_idx].next2comp = idx;
611
612                 /* For RCD with EOP set, check if there is frame error */
613                 if (unlikely(rcd->err)) {
614                         rxq->stats.drop_total++;
615                         rxq->stats.drop_err++;
616
617                         if (!rcd->fcs) {
618                                 rxq->stats.drop_fcs++;
619                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
620                         }
621                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
622                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
623                                          rxq->comp_ring.base), rcd->rxdIdx);
624                         rte_pktmbuf_free_seg(rxm);
625                         goto rcd_done;
626                 }
627
628
629                 /* Initialize newly received packet buffer */
630                 rxm->port = rxq->port_id;
631                 rxm->nb_segs = 1;
632                 rxm->next = NULL;
633                 rxm->pkt_len = (uint16_t)rcd->len;
634                 rxm->data_len = (uint16_t)rcd->len;
635                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
636                 rxm->ol_flags = 0;
637                 rxm->vlan_tci = 0;
638
639                 vmxnet3_rx_offload(rcd, rxm);
640
641                 rx_pkts[nb_rx++] = rxm;
642 rcd_done:
643                 rxq->cmd_ring[ring_idx].next2comp = idx;
644                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
645
646                 /* It's time to allocate some new buf and renew descriptors */
647                 vmxnet3_post_rx_bufs(rxq, ring_idx);
648                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
649                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
650                                                rxq->cmd_ring[ring_idx].next2fill);
651                 }
652
653                 /* Advance to the next descriptor in comp_ring */
654                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
655
656                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
657                 nb_rxd++;
658                 if (nb_rxd > rxq->cmd_ring[0].size) {
659                         PMD_RX_LOG(ERR,
660                                    "Used up quota of receiving packets,"
661                                    " relinquish control.");
662                         break;
663                 }
664         }
665
666         return nb_rx;
667 }
668
669 /*
670  * Create memzone for device rings. malloc can't be used as the physical address is
671  * needed. If the memzone is already created, then this function returns a ptr
672  * to the old one.
673  */
674 static const struct rte_memzone *
675 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
676                       uint16_t queue_id, uint32_t ring_size, int socket_id)
677 {
678         char z_name[RTE_MEMZONE_NAMESIZE];
679         const struct rte_memzone *mz;
680
681         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
682                         dev->driver->pci_drv.name, ring_name,
683                         dev->data->port_id, queue_id);
684
685         mz = rte_memzone_lookup(z_name);
686         if (mz)
687                 return mz;
688
689         return rte_memzone_reserve_aligned(z_name, ring_size,
690                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
691 }
692
693 int
694 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
695                            uint16_t queue_idx,
696                            uint16_t nb_desc,
697                            unsigned int socket_id,
698                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
699 {
700         struct vmxnet3_hw *hw = dev->data->dev_private;
701         const struct rte_memzone *mz;
702         struct vmxnet3_tx_queue *txq;
703         struct vmxnet3_cmd_ring *ring;
704         struct vmxnet3_comp_ring *comp_ring;
705         struct vmxnet3_data_ring *data_ring;
706         int size;
707
708         PMD_INIT_FUNC_TRACE();
709
710         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) !=
711             ETH_TXQ_FLAGS_NOXSUMS) {
712                 PMD_INIT_LOG(ERR, "TX no support for checksum offload yet");
713                 return -EINVAL;
714         }
715
716         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
717         if (txq == NULL) {
718                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
719                 return -ENOMEM;
720         }
721
722         txq->queue_id = queue_idx;
723         txq->port_id = dev->data->port_id;
724         txq->shared = &hw->tqd_start[queue_idx];
725         txq->hw = hw;
726         txq->qid = queue_idx;
727         txq->stopped = TRUE;
728
729         ring = &txq->cmd_ring;
730         comp_ring = &txq->comp_ring;
731         data_ring = &txq->data_ring;
732
733         /* Tx vmxnet ring length should be between 512-4096 */
734         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
735                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
736                              VMXNET3_DEF_TX_RING_SIZE);
737                 return -EINVAL;
738         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
739                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
740                              VMXNET3_TX_RING_MAX_SIZE);
741                 return -EINVAL;
742         } else {
743                 ring->size = nb_desc;
744                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
745         }
746         comp_ring->size = data_ring->size = ring->size;
747
748         /* Tx vmxnet rings structure initialization*/
749         ring->next2fill = 0;
750         ring->next2comp = 0;
751         ring->gen = VMXNET3_INIT_GEN;
752         comp_ring->next2proc = 0;
753         comp_ring->gen = VMXNET3_INIT_GEN;
754
755         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
756         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
757         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
758
759         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
760         if (mz == NULL) {
761                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
762                 return -ENOMEM;
763         }
764         memset(mz->addr, 0, mz->len);
765
766         /* cmd_ring initialization */
767         ring->base = mz->addr;
768         ring->basePA = mz->phys_addr;
769
770         /* comp_ring initialization */
771         comp_ring->base = ring->base + ring->size;
772         comp_ring->basePA = ring->basePA +
773                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
774
775         /* data_ring initialization */
776         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
777         data_ring->basePA = comp_ring->basePA +
778                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
779
780         /* cmd_ring0 buf_info allocation */
781         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
782                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
783         if (ring->buf_info == NULL) {
784                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
785                 return -ENOMEM;
786         }
787
788         /* Update the data portion with txq */
789         dev->data->tx_queues[queue_idx] = txq;
790
791         return 0;
792 }
793
794 int
795 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
796                            uint16_t queue_idx,
797                            uint16_t nb_desc,
798                            unsigned int socket_id,
799                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
800                            struct rte_mempool *mp)
801 {
802         const struct rte_memzone *mz;
803         struct vmxnet3_rx_queue *rxq;
804         struct vmxnet3_hw     *hw = dev->data->dev_private;
805         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
806         struct vmxnet3_comp_ring *comp_ring;
807         int size;
808         uint8_t i;
809         char mem_name[32];
810         uint16_t buf_size;
811
812         PMD_INIT_FUNC_TRACE();
813
814         buf_size = rte_pktmbuf_data_room_size(mp) -
815                 RTE_PKTMBUF_HEADROOM;
816
817         if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
818                 PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
819                              "VMXNET3 don't support scatter packets yet",
820                              buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
821                 return -EINVAL;
822         }
823
824         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
825         if (rxq == NULL) {
826                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
827                 return -ENOMEM;
828         }
829
830         rxq->mp = mp;
831         rxq->queue_id = queue_idx;
832         rxq->port_id = dev->data->port_id;
833         rxq->shared = &hw->rqd_start[queue_idx];
834         rxq->hw = hw;
835         rxq->qid1 = queue_idx;
836         rxq->qid2 = queue_idx + hw->num_rx_queues;
837         rxq->stopped = TRUE;
838
839         ring0 = &rxq->cmd_ring[0];
840         ring1 = &rxq->cmd_ring[1];
841         comp_ring = &rxq->comp_ring;
842
843         /* Rx vmxnet rings length should be between 256-4096 */
844         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
845                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
846                 return -EINVAL;
847         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
848                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
849                 return -EINVAL;
850         } else {
851                 ring0->size = nb_desc;
852                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
853                 ring1->size = ring0->size;
854         }
855
856         comp_ring->size = ring0->size + ring1->size;
857
858         /* Rx vmxnet rings structure initialization */
859         ring0->next2fill = 0;
860         ring1->next2fill = 0;
861         ring0->next2comp = 0;
862         ring1->next2comp = 0;
863         ring0->gen = VMXNET3_INIT_GEN;
864         ring1->gen = VMXNET3_INIT_GEN;
865         comp_ring->next2proc = 0;
866         comp_ring->gen = VMXNET3_INIT_GEN;
867
868         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
869         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
870
871         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
872         if (mz == NULL) {
873                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
874                 return -ENOMEM;
875         }
876         memset(mz->addr, 0, mz->len);
877
878         /* cmd_ring0 initialization */
879         ring0->base = mz->addr;
880         ring0->basePA = mz->phys_addr;
881
882         /* cmd_ring1 initialization */
883         ring1->base = ring0->base + ring0->size;
884         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
885
886         /* comp_ring initialization */
887         comp_ring->base = ring1->base + ring1->size;
888         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
889                 ring1->size;
890
891         /* cmd_ring0-cmd_ring1 buf_info allocation */
892         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
893
894                 ring = &rxq->cmd_ring[i];
895                 ring->rid = i;
896                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
897
898                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
899                 if (ring->buf_info == NULL) {
900                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
901                         return -ENOMEM;
902                 }
903         }
904
905         /* Update the data portion with rxq */
906         dev->data->rx_queues[queue_idx] = rxq;
907
908         return 0;
909 }
910
911 /*
912  * Initializes Receive Unit
913  * Load mbufs in rx queue in advance
914  */
915 int
916 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
917 {
918         struct vmxnet3_hw *hw = dev->data->dev_private;
919
920         int i, ret;
921         uint8_t j;
922
923         PMD_INIT_FUNC_TRACE();
924
925         for (i = 0; i < hw->num_rx_queues; i++) {
926                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
927
928                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
929                         /* Passing 0 as alloc_num will allocate full ring */
930                         ret = vmxnet3_post_rx_bufs(rxq, j);
931                         if (ret <= 0) {
932                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
933                                 return -ret;
934                         }
935                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
936                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
937                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
938                                                        rxq->cmd_ring[j].next2fill);
939                         }
940                 }
941                 rxq->stopped = FALSE;
942         }
943
944         for (i = 0; i < dev->data->nb_tx_queues; i++) {
945                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
946
947                 txq->stopped = FALSE;
948         }
949
950         return 0;
951 }
952
953 static uint8_t rss_intel_key[40] = {
954         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
955         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
956         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
957         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
958         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
959 };
960
961 /*
962  * Configure RSS feature
963  */
964 int
965 vmxnet3_rss_configure(struct rte_eth_dev *dev)
966 {
967         struct vmxnet3_hw *hw = dev->data->dev_private;
968         struct VMXNET3_RSSConf *dev_rss_conf;
969         struct rte_eth_rss_conf *port_rss_conf;
970         uint64_t rss_hf;
971         uint8_t i, j;
972
973         PMD_INIT_FUNC_TRACE();
974
975         dev_rss_conf = hw->rss_conf;
976         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
977
978         /* loading hashFunc */
979         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
980         /* loading hashKeySize */
981         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
982         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
983         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
984
985         if (port_rss_conf->rss_key == NULL) {
986                 /* Default hash key */
987                 port_rss_conf->rss_key = rss_intel_key;
988         }
989
990         /* loading hashKey */
991         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
992
993         /* loading indTable */
994         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
995                 if (j == dev->data->nb_rx_queues)
996                         j = 0;
997                 dev_rss_conf->indTable[i] = j;
998         }
999
1000         /* loading hashType */
1001         dev_rss_conf->hashType = 0;
1002         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1003         if (rss_hf & ETH_RSS_IPV4)
1004                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1005         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1006                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1007         if (rss_hf & ETH_RSS_IPV6)
1008                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1009         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1010                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1011
1012         return VMXNET3_SUCCESS;
1013 }