ebf883fb401fb6136aae7d2e0bf453eaa941e208
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_ring.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_ip.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73
74 #include "base/vmxnet3_defs.h"
75 #include "vmxnet3_ring.h"
76
77 #include "vmxnet3_logs.h"
78 #include "vmxnet3_ethdev.h"
79
80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
81
82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
87 #endif
88
89 static struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
91 {
92         struct rte_mbuf *m;
93
94         m = __rte_mbuf_raw_alloc(mp);
95         __rte_mbuf_sanity_check_raw(m, 0);
96         return m;
97 }
98
99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
100 static void
101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
102 {
103         uint32_t avail = 0;
104
105         if (rxq == NULL)
106                 return;
107
108         PMD_RX_LOG(DEBUG,
109                    "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.",
110                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
111         PMD_RX_LOG(DEBUG,
112                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
113                    (unsigned long)rxq->cmd_ring[0].basePA,
114                    (unsigned long)rxq->cmd_ring[1].basePA,
115                    (unsigned long)rxq->comp_ring.basePA);
116
117         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
118         PMD_RX_LOG(DEBUG,
119                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
120                    (uint32_t)rxq->cmd_ring[0].size, avail,
121                    rxq->comp_ring.next2proc,
122                    rxq->cmd_ring[0].size - avail);
123
124         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
125         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
126                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
127                    rxq->cmd_ring[1].size - avail);
128
129 }
130
131 static void
132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
133 {
134         uint32_t avail = 0;
135
136         if (txq == NULL)
137                 return;
138
139         PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.",
140                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
141         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
142                    (unsigned long)txq->cmd_ring.basePA,
143                    (unsigned long)txq->comp_ring.basePA,
144                    (unsigned long)txq->data_ring.basePA);
145
146         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
147         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
148                    (uint32_t)txq->cmd_ring.size, avail,
149                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
150 }
151 #endif
152
153 static void
154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
155 {
156         while (ring->next2comp != ring->next2fill) {
157                 /* No need to worry about tx desc ownership, device is quiesced by now. */
158                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
159
160                 if (buf_info->m) {
161                         rte_pktmbuf_free(buf_info->m);
162                         buf_info->m = NULL;
163                         buf_info->bufPA = 0;
164                         buf_info->len = 0;
165                 }
166                 vmxnet3_cmd_ring_adv_next2comp(ring);
167         }
168 }
169
170 static void
171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
172 {
173         vmxnet3_cmd_ring_release_mbufs(ring);
174         rte_free(ring->buf_info);
175         ring->buf_info = NULL;
176 }
177
178
179 void
180 vmxnet3_dev_tx_queue_release(void *txq)
181 {
182         vmxnet3_tx_queue_t *tq = txq;
183
184         if (tq != NULL) {
185                 /* Release the cmd_ring */
186                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
187         }
188 }
189
190 void
191 vmxnet3_dev_rx_queue_release(void *rxq)
192 {
193         int i;
194         vmxnet3_rx_queue_t *rq = rxq;
195
196         if (rq != NULL) {
197                 /* Release both the cmd_rings */
198                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
199                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
200         }
201 }
202
203 static void
204 vmxnet3_dev_tx_queue_reset(void *txq)
205 {
206         vmxnet3_tx_queue_t *tq = txq;
207         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
208         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
209         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
210         int size;
211
212         if (tq != NULL) {
213                 /* Release the cmd_ring mbufs */
214                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
215         }
216
217         /* Tx vmxnet rings structure initialization*/
218         ring->next2fill = 0;
219         ring->next2comp = 0;
220         ring->gen = VMXNET3_INIT_GEN;
221         comp_ring->next2proc = 0;
222         comp_ring->gen = VMXNET3_INIT_GEN;
223
224         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
225         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
226         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
227
228         memset(ring->base, 0, size);
229 }
230
231 static void
232 vmxnet3_dev_rx_queue_reset(void *rxq)
233 {
234         int i;
235         vmxnet3_rx_queue_t *rq = rxq;
236         struct vmxnet3_cmd_ring *ring0, *ring1;
237         struct vmxnet3_comp_ring *comp_ring;
238         int size;
239
240         if (rq != NULL) {
241                 /* Release both the cmd_rings mbufs */
242                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
243                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
244         }
245
246         ring0 = &rq->cmd_ring[0];
247         ring1 = &rq->cmd_ring[1];
248         comp_ring = &rq->comp_ring;
249
250         /* Rx vmxnet rings structure initialization */
251         ring0->next2fill = 0;
252         ring1->next2fill = 0;
253         ring0->next2comp = 0;
254         ring1->next2comp = 0;
255         ring0->gen = VMXNET3_INIT_GEN;
256         ring1->gen = VMXNET3_INIT_GEN;
257         comp_ring->next2proc = 0;
258         comp_ring->gen = VMXNET3_INIT_GEN;
259
260         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
261         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
262
263         memset(ring0->base, 0, size);
264 }
265
266 void
267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
268 {
269         unsigned i;
270
271         PMD_INIT_FUNC_TRACE();
272
273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
274                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
275
276                 if (txq != NULL) {
277                         txq->stopped = TRUE;
278                         vmxnet3_dev_tx_queue_reset(txq);
279                 }
280         }
281
282         for (i = 0; i < dev->data->nb_rx_queues; i++) {
283                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
284
285                 if (rxq != NULL) {
286                         rxq->stopped = TRUE;
287                         vmxnet3_dev_rx_queue_reset(rxq);
288                 }
289         }
290 }
291
292 static int
293 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
294 {
295         int completed = 0;
296         struct rte_mbuf *mbuf;
297
298         /* Release cmd_ring descriptor and free mbuf */
299         VMXNET3_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
300
301         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
302         if (mbuf == NULL)
303                 rte_panic("EOP desc does not point to a valid mbuf");
304         rte_pktmbuf_free(mbuf);
305
306         txq->cmd_ring.buf_info[eop_idx].m = NULL;
307
308         while (txq->cmd_ring.next2comp != eop_idx) {
309                 /* no out-of-order completion */
310                 VMXNET3_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
311                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
312                 completed++;
313         }
314
315         /* Mark the txd for which tcd was generated as completed */
316         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317
318         return completed + 1;
319 }
320
321 static void
322 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
323 {
324         int completed = 0;
325         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
326         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
327                 (comp_ring->base + comp_ring->next2proc);
328
329         while (tcd->gen == comp_ring->gen) {
330                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
331
332                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
333                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
334                                                     comp_ring->next2proc);
335         }
336
337         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
338 }
339
340 uint16_t
341 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
342                   uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345         vmxnet3_tx_queue_t *txq = tx_queue;
346         struct vmxnet3_hw *hw = txq->hw;
347         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
348         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
349
350         if (unlikely(txq->stopped)) {
351                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
352                 return 0;
353         }
354
355         /* Free up the comp_descriptors aggressively */
356         vmxnet3_tq_tx_complete(txq);
357
358         nb_tx = 0;
359         while (nb_tx < nb_pkts) {
360                 Vmxnet3_GenericDesc *gdesc;
361                 vmxnet3_buf_info_t *tbi;
362                 uint32_t first2fill, avail, dw2;
363                 struct rte_mbuf *txm = tx_pkts[nb_tx];
364                 struct rte_mbuf *m_seg = txm;
365                 int copy_size = 0;
366                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
367                 /* # of descriptors needed for a packet. */
368                 unsigned count = txm->nb_segs;
369
370                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
371                 if (count > avail) {
372                         /* Is command ring full? */
373                         if (unlikely(avail == 0)) {
374                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
375                                 txq->stats.tx_ring_full++;
376                                 txq->stats.drop_total += (nb_pkts - nb_tx);
377                                 break;
378                         }
379
380                         /* Command ring is not full but cannot handle the
381                          * multi-segmented packet. Let's try the next packet
382                          * in this case.
383                          */
384                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
385                                    "(avail %d needed %d)", avail, count);
386                         txq->stats.drop_total++;
387                         if (tso)
388                                 txq->stats.drop_tso++;
389                         rte_pktmbuf_free(txm);
390                         nb_tx++;
391                         continue;
392                 }
393
394                 /* Drop non-TSO packet that is excessively fragmented */
395                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
396                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
397                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
398                         txq->stats.drop_too_many_segs++;
399                         txq->stats.drop_total++;
400                         rte_pktmbuf_free(txm);
401                         nb_tx++;
402                         continue;
403                 }
404
405                 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
406                         struct Vmxnet3_TxDataDesc *tdd;
407
408                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
409                         copy_size = rte_pktmbuf_pkt_len(txm);
410                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
411                 }
412
413                 /* use the previous gen bit for the SOP desc */
414                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
415                 first2fill = txq->cmd_ring.next2fill;
416                 do {
417                         /* Remember the transmit buffer for cleanup */
418                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
419
420                         /* NB: the following assumes that VMXNET3 maximum
421                          * transmit buffer size (16K) is greater than
422                          * maximum size of mbuf segment size.
423                          */
424                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
425                         if (copy_size)
426                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
427                                                                 txq->cmd_ring.next2fill *
428                                                                 sizeof(struct Vmxnet3_TxDataDesc));
429                         else
430                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
431
432                         gdesc->dword[2] = dw2 | m_seg->data_len;
433                         gdesc->dword[3] = 0;
434
435                         /* move to the next2fill descriptor */
436                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
437
438                         /* use the right gen for non-SOP desc */
439                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
440                 } while ((m_seg = m_seg->next) != NULL);
441
442                 /* set the last buf_info for the pkt */
443                 tbi->m = txm;
444                 /* Update the EOP descriptor */
445                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
446
447                 /* Add VLAN tag if present */
448                 gdesc = txq->cmd_ring.base + first2fill;
449                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
450                         gdesc->txd.ti = 1;
451                         gdesc->txd.tci = txm->vlan_tci;
452                 }
453
454                 if (tso) {
455                         uint16_t mss = txm->tso_segsz;
456
457                         VMXNET3_ASSERT(mss > 0);
458
459                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
460                         gdesc->txd.om = VMXNET3_OM_TSO;
461                         gdesc->txd.msscof = mss;
462
463                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
464                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
465                         gdesc->txd.om = VMXNET3_OM_CSUM;
466                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
467
468                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
469                         case PKT_TX_TCP_CKSUM:
470                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
471                                 break;
472                         case PKT_TX_UDP_CKSUM:
473                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
474                                 break;
475                         default:
476                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
477                                            txm->ol_flags & PKT_TX_L4_MASK);
478                                 abort();
479                         }
480                         deferred++;
481                 } else {
482                         gdesc->txd.hlen = 0;
483                         gdesc->txd.om = VMXNET3_OM_NONE;
484                         gdesc->txd.msscof = 0;
485                         deferred++;
486                 }
487
488                 /* flip the GEN bit on the SOP */
489                 rte_compiler_barrier();
490                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
491
492                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
493                 nb_tx++;
494         }
495
496         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
497
498         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
499                 txq_ctrl->txNumDeferred = 0;
500                 /* Notify vSwitch that packets are available. */
501                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
502                                        txq->cmd_ring.next2fill);
503         }
504
505         return nb_tx;
506 }
507
508 /*
509  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
510  *  so that device can receive packets in those buffers.
511  *      Ring layout:
512  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
513  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
514  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
515  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
516  *      only for LRO.
517  *
518  */
519 static int
520 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
521 {
522         int err = 0;
523         uint32_t i = 0, val = 0;
524         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
525
526         if (ring_id == 0) {
527                 /* Usually: One HEAD type buf per packet
528                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
529                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
530                  */
531
532                 /* We use single packet buffer so all heads here */
533                 val = VMXNET3_RXD_BTYPE_HEAD;
534         } else {
535                 /* All BODY type buffers for 2nd ring */
536                 val = VMXNET3_RXD_BTYPE_BODY;
537         }
538
539         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
540                 struct Vmxnet3_RxDesc *rxd;
541                 struct rte_mbuf *mbuf;
542                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
543
544                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
545
546                 /* Allocate blank mbuf for the current Rx Descriptor */
547                 mbuf = rte_rxmbuf_alloc(rxq->mp);
548                 if (unlikely(mbuf == NULL)) {
549                         PMD_RX_LOG(ERR, "Error allocating mbuf");
550                         rxq->stats.rx_buf_alloc_failure++;
551                         err = ENOMEM;
552                         break;
553                 }
554
555                 /*
556                  * Load mbuf pointer into buf_info[ring_size]
557                  * buf_info structure is equivalent to cookie for virtio-virtqueue
558                  */
559                 buf_info->m = mbuf;
560                 buf_info->len = (uint16_t)(mbuf->buf_len -
561                                            RTE_PKTMBUF_HEADROOM);
562                 buf_info->bufPA =
563                         rte_mbuf_data_dma_addr_default(mbuf);
564
565                 /* Load Rx Descriptor with the buffer's GPA */
566                 rxd->addr = buf_info->bufPA;
567
568                 /* After this point rxd->addr MUST not be NULL */
569                 rxd->btype = val;
570                 rxd->len = buf_info->len;
571                 /* Flip gen bit at the end to change ownership */
572                 rxd->gen = ring->gen;
573
574                 vmxnet3_cmd_ring_adv_next2fill(ring);
575                 i++;
576         }
577
578         /* Return error only if no buffers are posted at present */
579         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
580                 return -err;
581         else
582                 return i;
583 }
584
585
586 /* Receive side checksum and other offloads */
587 static void
588 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
589 {
590         /* Check for hardware stripped VLAN tag */
591         if (rcd->ts) {
592                 rxm->ol_flags |= PKT_RX_VLAN_PKT;
593                 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
594         }
595
596         /* Check for RSS */
597         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
598                 rxm->ol_flags |= PKT_RX_RSS_HASH;
599                 rxm->hash.rss = rcd->rssHash;
600         }
601
602         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
603         if (rcd->v4) {
604                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
605                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
606
607                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
608                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
609                 else
610                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
611
612                 if (!rcd->cnc) {
613                         if (!rcd->ipc)
614                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
615
616                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
617                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
618                 }
619         }
620 }
621
622 /*
623  * Process the Rx Completion Ring of given vmxnet3_rx_queue
624  * for nb_pkts burst and return the number of packets received
625  */
626 uint16_t
627 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
628 {
629         uint16_t nb_rx;
630         uint32_t nb_rxd, idx;
631         uint8_t ring_idx;
632         vmxnet3_rx_queue_t *rxq;
633         Vmxnet3_RxCompDesc *rcd;
634         vmxnet3_buf_info_t *rbi;
635         Vmxnet3_RxDesc *rxd;
636         struct rte_mbuf *rxm = NULL;
637         struct vmxnet3_hw *hw;
638
639         nb_rx = 0;
640         ring_idx = 0;
641         nb_rxd = 0;
642         idx = 0;
643
644         rxq = rx_queue;
645         hw = rxq->hw;
646
647         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
648
649         if (unlikely(rxq->stopped)) {
650                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
651                 return 0;
652         }
653
654         while (rcd->gen == rxq->comp_ring.gen) {
655                 if (nb_rx >= nb_pkts)
656                         break;
657
658                 idx = rcd->rxdIdx;
659                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
660                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
661                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
662
663                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
664
665                 VMXNET3_ASSERT(rcd->len <= rxd->len);
666                 VMXNET3_ASSERT(rbi->m);
667
668                 /* Get the packet buffer pointer from buf_info */
669                 rxm = rbi->m;
670
671                 /* Clear descriptor associated buf_info to be reused */
672                 rbi->m = NULL;
673                 rbi->bufPA = 0;
674
675                 /* Update the index that we received a packet */
676                 rxq->cmd_ring[ring_idx].next2comp = idx;
677
678                 /* For RCD with EOP set, check if there is frame error */
679                 if (unlikely(rcd->eop && rcd->err)) {
680                         rxq->stats.drop_total++;
681                         rxq->stats.drop_err++;
682
683                         if (!rcd->fcs) {
684                                 rxq->stats.drop_fcs++;
685                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
686                         }
687                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
688                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
689                                          rxq->comp_ring.base), rcd->rxdIdx);
690                         rte_pktmbuf_free_seg(rxm);
691                         goto rcd_done;
692                 }
693
694
695                 /* Initialize newly received packet buffer */
696                 rxm->port = rxq->port_id;
697                 rxm->nb_segs = 1;
698                 rxm->next = NULL;
699                 rxm->pkt_len = (uint16_t)rcd->len;
700                 rxm->data_len = (uint16_t)rcd->len;
701                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
702                 rxm->ol_flags = 0;
703                 rxm->vlan_tci = 0;
704
705                 /*
706                  * If this is the first buffer of the received packet,
707                  * set the pointer to the first mbuf of the packet
708                  * Otherwise, update the total length and the number of segments
709                  * of the current scattered packet, and update the pointer to
710                  * the last mbuf of the current packet.
711                  */
712                 if (rcd->sop) {
713                         VMXNET3_ASSERT(rxq->start_seg != NULL);
714                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
715
716                         if (unlikely(rcd->len == 0)) {
717                                 VMXNET3_ASSERT(rcd->eop);
718
719                                 PMD_RX_LOG(DEBUG,
720                                            "Rx buf was skipped. rxring[%d][%d])",
721                                            ring_idx, idx);
722                                 rte_pktmbuf_free_seg(rxm);
723                                 goto rcd_done;
724                         }
725
726                         rxq->start_seg = rxm;
727                         vmxnet3_rx_offload(rcd, rxm);
728                 } else {
729                         struct rte_mbuf *start = rxq->start_seg;
730
731                         VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
732                         VMXNET3_ASSERT(start != NULL);
733
734                         start->pkt_len += rxm->data_len;
735                         start->nb_segs++;
736
737                         rxq->last_seg->next = rxm;
738                 }
739                 rxq->last_seg = rxm;
740
741                 if (rcd->eop) {
742                         rx_pkts[nb_rx++] = rxq->start_seg;
743                         rxq->start_seg = NULL;
744                 }
745
746 rcd_done:
747                 rxq->cmd_ring[ring_idx].next2comp = idx;
748                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
749
750                 /* It's time to allocate some new buf and renew descriptors */
751                 vmxnet3_post_rx_bufs(rxq, ring_idx);
752                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
753                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
754                                                rxq->cmd_ring[ring_idx].next2fill);
755                 }
756
757                 /* Advance to the next descriptor in comp_ring */
758                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
759
760                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
761                 nb_rxd++;
762                 if (nb_rxd > rxq->cmd_ring[0].size) {
763                         PMD_RX_LOG(ERR,
764                                    "Used up quota of receiving packets,"
765                                    " relinquish control.");
766                         break;
767                 }
768         }
769
770         return nb_rx;
771 }
772
773 /*
774  * Create memzone for device rings. malloc can't be used as the physical address is
775  * needed. If the memzone is already created, then this function returns a ptr
776  * to the old one.
777  */
778 static const struct rte_memzone *
779 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
780                       uint16_t queue_id, uint32_t ring_size, int socket_id)
781 {
782         char z_name[RTE_MEMZONE_NAMESIZE];
783         const struct rte_memzone *mz;
784
785         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
786                         dev->driver->pci_drv.name, ring_name,
787                         dev->data->port_id, queue_id);
788
789         mz = rte_memzone_lookup(z_name);
790         if (mz)
791                 return mz;
792
793         return rte_memzone_reserve_aligned(z_name, ring_size,
794                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
795 }
796
797 int
798 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
799                            uint16_t queue_idx,
800                            uint16_t nb_desc,
801                            unsigned int socket_id,
802                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
803 {
804         struct vmxnet3_hw *hw = dev->data->dev_private;
805         const struct rte_memzone *mz;
806         struct vmxnet3_tx_queue *txq;
807         struct vmxnet3_cmd_ring *ring;
808         struct vmxnet3_comp_ring *comp_ring;
809         struct vmxnet3_data_ring *data_ring;
810         int size;
811
812         PMD_INIT_FUNC_TRACE();
813
814         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
815             ETH_TXQ_FLAGS_NOXSUMSCTP) {
816                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
817                 return -EINVAL;
818         }
819
820         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
821         if (txq == NULL) {
822                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
823                 return -ENOMEM;
824         }
825
826         txq->queue_id = queue_idx;
827         txq->port_id = dev->data->port_id;
828         txq->shared = &hw->tqd_start[queue_idx];
829         txq->hw = hw;
830         txq->qid = queue_idx;
831         txq->stopped = TRUE;
832
833         ring = &txq->cmd_ring;
834         comp_ring = &txq->comp_ring;
835         data_ring = &txq->data_ring;
836
837         /* Tx vmxnet ring length should be between 512-4096 */
838         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
839                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
840                              VMXNET3_DEF_TX_RING_SIZE);
841                 return -EINVAL;
842         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
843                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
844                              VMXNET3_TX_RING_MAX_SIZE);
845                 return -EINVAL;
846         } else {
847                 ring->size = nb_desc;
848                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
849         }
850         comp_ring->size = data_ring->size = ring->size;
851
852         /* Tx vmxnet rings structure initialization*/
853         ring->next2fill = 0;
854         ring->next2comp = 0;
855         ring->gen = VMXNET3_INIT_GEN;
856         comp_ring->next2proc = 0;
857         comp_ring->gen = VMXNET3_INIT_GEN;
858
859         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
860         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
861         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
862
863         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
864         if (mz == NULL) {
865                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
866                 return -ENOMEM;
867         }
868         memset(mz->addr, 0, mz->len);
869
870         /* cmd_ring initialization */
871         ring->base = mz->addr;
872         ring->basePA = mz->phys_addr;
873
874         /* comp_ring initialization */
875         comp_ring->base = ring->base + ring->size;
876         comp_ring->basePA = ring->basePA +
877                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
878
879         /* data_ring initialization */
880         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
881         data_ring->basePA = comp_ring->basePA +
882                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
883
884         /* cmd_ring0 buf_info allocation */
885         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
886                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
887         if (ring->buf_info == NULL) {
888                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
889                 return -ENOMEM;
890         }
891
892         /* Update the data portion with txq */
893         dev->data->tx_queues[queue_idx] = txq;
894
895         return 0;
896 }
897
898 int
899 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
900                            uint16_t queue_idx,
901                            uint16_t nb_desc,
902                            unsigned int socket_id,
903                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
904                            struct rte_mempool *mp)
905 {
906         const struct rte_memzone *mz;
907         struct vmxnet3_rx_queue *rxq;
908         struct vmxnet3_hw     *hw = dev->data->dev_private;
909         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
910         struct vmxnet3_comp_ring *comp_ring;
911         int size;
912         uint8_t i;
913         char mem_name[32];
914
915         PMD_INIT_FUNC_TRACE();
916
917         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
918         if (rxq == NULL) {
919                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
920                 return -ENOMEM;
921         }
922
923         rxq->mp = mp;
924         rxq->queue_id = queue_idx;
925         rxq->port_id = dev->data->port_id;
926         rxq->shared = &hw->rqd_start[queue_idx];
927         rxq->hw = hw;
928         rxq->qid1 = queue_idx;
929         rxq->qid2 = queue_idx + hw->num_rx_queues;
930         rxq->stopped = TRUE;
931
932         ring0 = &rxq->cmd_ring[0];
933         ring1 = &rxq->cmd_ring[1];
934         comp_ring = &rxq->comp_ring;
935
936         /* Rx vmxnet rings length should be between 256-4096 */
937         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
938                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
939                 return -EINVAL;
940         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
941                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
942                 return -EINVAL;
943         } else {
944                 ring0->size = nb_desc;
945                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
946                 ring1->size = ring0->size;
947         }
948
949         comp_ring->size = ring0->size + ring1->size;
950
951         /* Rx vmxnet rings structure initialization */
952         ring0->next2fill = 0;
953         ring1->next2fill = 0;
954         ring0->next2comp = 0;
955         ring1->next2comp = 0;
956         ring0->gen = VMXNET3_INIT_GEN;
957         ring1->gen = VMXNET3_INIT_GEN;
958         comp_ring->next2proc = 0;
959         comp_ring->gen = VMXNET3_INIT_GEN;
960
961         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
962         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
963
964         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
965         if (mz == NULL) {
966                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
967                 return -ENOMEM;
968         }
969         memset(mz->addr, 0, mz->len);
970
971         /* cmd_ring0 initialization */
972         ring0->base = mz->addr;
973         ring0->basePA = mz->phys_addr;
974
975         /* cmd_ring1 initialization */
976         ring1->base = ring0->base + ring0->size;
977         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
978
979         /* comp_ring initialization */
980         comp_ring->base = ring1->base + ring1->size;
981         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
982                 ring1->size;
983
984         /* cmd_ring0-cmd_ring1 buf_info allocation */
985         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
986
987                 ring = &rxq->cmd_ring[i];
988                 ring->rid = i;
989                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
990
991                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
992                 if (ring->buf_info == NULL) {
993                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
994                         return -ENOMEM;
995                 }
996         }
997
998         /* Update the data portion with rxq */
999         dev->data->rx_queues[queue_idx] = rxq;
1000
1001         return 0;
1002 }
1003
1004 /*
1005  * Initializes Receive Unit
1006  * Load mbufs in rx queue in advance
1007  */
1008 int
1009 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1010 {
1011         struct vmxnet3_hw *hw = dev->data->dev_private;
1012
1013         int i, ret;
1014         uint8_t j;
1015
1016         PMD_INIT_FUNC_TRACE();
1017
1018         for (i = 0; i < hw->num_rx_queues; i++) {
1019                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1020
1021                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1022                         /* Passing 0 as alloc_num will allocate full ring */
1023                         ret = vmxnet3_post_rx_bufs(rxq, j);
1024                         if (ret <= 0) {
1025                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1026                                 return -ret;
1027                         }
1028                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1029                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1030                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1031                                                        rxq->cmd_ring[j].next2fill);
1032                         }
1033                 }
1034                 rxq->stopped = FALSE;
1035                 rxq->start_seg = NULL;
1036         }
1037
1038         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1039                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1040
1041                 txq->stopped = FALSE;
1042         }
1043
1044         return 0;
1045 }
1046
1047 static uint8_t rss_intel_key[40] = {
1048         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1049         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1050         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1051         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1052         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1053 };
1054
1055 /*
1056  * Configure RSS feature
1057  */
1058 int
1059 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1060 {
1061         struct vmxnet3_hw *hw = dev->data->dev_private;
1062         struct VMXNET3_RSSConf *dev_rss_conf;
1063         struct rte_eth_rss_conf *port_rss_conf;
1064         uint64_t rss_hf;
1065         uint8_t i, j;
1066
1067         PMD_INIT_FUNC_TRACE();
1068
1069         dev_rss_conf = hw->rss_conf;
1070         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1071
1072         /* loading hashFunc */
1073         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1074         /* loading hashKeySize */
1075         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1076         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1077         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1078
1079         if (port_rss_conf->rss_key == NULL) {
1080                 /* Default hash key */
1081                 port_rss_conf->rss_key = rss_intel_key;
1082         }
1083
1084         /* loading hashKey */
1085         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1086
1087         /* loading indTable */
1088         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1089                 if (j == dev->data->nb_rx_queues)
1090                         j = 0;
1091                 dev_rss_conf->indTable[i] = j;
1092         }
1093
1094         /* loading hashType */
1095         dev_rss_conf->hashType = 0;
1096         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1097         if (rss_hf & ETH_RSS_IPV4)
1098                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1099         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1100                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1101         if (rss_hf & ETH_RSS_IPV6)
1102                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1103         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1104                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1105
1106         return VMXNET3_SUCCESS;
1107 }