remove unused ring includes
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
75
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
78
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
80
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
86 #endif
87
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
89 static void
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
91 {
92         uint32_t avail = 0;
93
94         if (rxq == NULL)
95                 return;
96
97         PMD_RX_LOG(DEBUG,
98                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
100         PMD_RX_LOG(DEBUG,
101                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102                    (unsigned long)rxq->cmd_ring[0].basePA,
103                    (unsigned long)rxq->cmd_ring[1].basePA,
104                    (unsigned long)rxq->comp_ring.basePA);
105
106         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
107         PMD_RX_LOG(DEBUG,
108                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109                    (uint32_t)rxq->cmd_ring[0].size, avail,
110                    rxq->comp_ring.next2proc,
111                    rxq->cmd_ring[0].size - avail);
112
113         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116                    rxq->cmd_ring[1].size - avail);
117
118 }
119
120 static void
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
122 {
123         uint32_t avail = 0;
124
125         if (txq == NULL)
126                 return;
127
128         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131                    (unsigned long)txq->cmd_ring.basePA,
132                    (unsigned long)txq->comp_ring.basePA,
133                    (unsigned long)txq->data_ring.basePA);
134
135         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137                    (uint32_t)txq->cmd_ring.size, avail,
138                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
139 }
140 #endif
141
142 static void
143 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
144 {
145         while (ring->next2comp != ring->next2fill) {
146                 /* No need to worry about tx desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         vmxnet3_cmd_ring_release_mbufs(ring);
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167
168 void
169 vmxnet3_dev_tx_queue_release(void *txq)
170 {
171         vmxnet3_tx_queue_t *tq = txq;
172
173         if (tq != NULL) {
174                 /* Release the cmd_ring */
175                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176         }
177 }
178
179 void
180 vmxnet3_dev_rx_queue_release(void *rxq)
181 {
182         int i;
183         vmxnet3_rx_queue_t *rq = rxq;
184
185         if (rq != NULL) {
186                 /* Release both the cmd_rings */
187                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
188                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
189         }
190 }
191
192 static void
193 vmxnet3_dev_tx_queue_reset(void *txq)
194 {
195         vmxnet3_tx_queue_t *tq = txq;
196         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
197         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
198         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
199         int size;
200
201         if (tq != NULL) {
202                 /* Release the cmd_ring mbufs */
203                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
204         }
205
206         /* Tx vmxnet rings structure initialization*/
207         ring->next2fill = 0;
208         ring->next2comp = 0;
209         ring->gen = VMXNET3_INIT_GEN;
210         comp_ring->next2proc = 0;
211         comp_ring->gen = VMXNET3_INIT_GEN;
212
213         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
214         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
215         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
216
217         memset(ring->base, 0, size);
218 }
219
220 static void
221 vmxnet3_dev_rx_queue_reset(void *rxq)
222 {
223         int i;
224         vmxnet3_rx_queue_t *rq = rxq;
225         struct vmxnet3_cmd_ring *ring0, *ring1;
226         struct vmxnet3_comp_ring *comp_ring;
227         int size;
228
229         if (rq != NULL) {
230                 /* Release both the cmd_rings mbufs */
231                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
232                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
233         }
234
235         ring0 = &rq->cmd_ring[0];
236         ring1 = &rq->cmd_ring[1];
237         comp_ring = &rq->comp_ring;
238
239         /* Rx vmxnet rings structure initialization */
240         ring0->next2fill = 0;
241         ring1->next2fill = 0;
242         ring0->next2comp = 0;
243         ring1->next2comp = 0;
244         ring0->gen = VMXNET3_INIT_GEN;
245         ring1->gen = VMXNET3_INIT_GEN;
246         comp_ring->next2proc = 0;
247         comp_ring->gen = VMXNET3_INIT_GEN;
248
249         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
250         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
251
252         memset(ring0->base, 0, size);
253 }
254
255 void
256 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
257 {
258         unsigned i;
259
260         PMD_INIT_FUNC_TRACE();
261
262         for (i = 0; i < dev->data->nb_tx_queues; i++) {
263                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
264
265                 if (txq != NULL) {
266                         txq->stopped = TRUE;
267                         vmxnet3_dev_tx_queue_reset(txq);
268                 }
269         }
270
271         for (i = 0; i < dev->data->nb_rx_queues; i++) {
272                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
273
274                 if (rxq != NULL) {
275                         rxq->stopped = TRUE;
276                         vmxnet3_dev_rx_queue_reset(rxq);
277                 }
278         }
279 }
280
281 static int
282 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
283 {
284         int completed = 0;
285         struct rte_mbuf *mbuf;
286
287         /* Release cmd_ring descriptor and free mbuf */
288         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
289
290         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
291         if (mbuf == NULL)
292                 rte_panic("EOP desc does not point to a valid mbuf");
293         rte_pktmbuf_free(mbuf);
294
295         txq->cmd_ring.buf_info[eop_idx].m = NULL;
296
297         while (txq->cmd_ring.next2comp != eop_idx) {
298                 /* no out-of-order completion */
299                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
300                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
301                 completed++;
302         }
303
304         /* Mark the txd for which tcd was generated as completed */
305         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
306
307         return completed + 1;
308 }
309
310 static void
311 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
312 {
313         int completed = 0;
314         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
315         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
316                 (comp_ring->base + comp_ring->next2proc);
317
318         while (tcd->gen == comp_ring->gen) {
319                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
320
321                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
322                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
323                                                     comp_ring->next2proc);
324         }
325
326         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
327 }
328
329 uint16_t
330 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
331                   uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334         vmxnet3_tx_queue_t *txq = tx_queue;
335         struct vmxnet3_hw *hw = txq->hw;
336         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
337         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
338
339         if (unlikely(txq->stopped)) {
340                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
341                 return 0;
342         }
343
344         /* Free up the comp_descriptors aggressively */
345         vmxnet3_tq_tx_complete(txq);
346
347         nb_tx = 0;
348         while (nb_tx < nb_pkts) {
349                 Vmxnet3_GenericDesc *gdesc;
350                 vmxnet3_buf_info_t *tbi;
351                 uint32_t first2fill, avail, dw2;
352                 struct rte_mbuf *txm = tx_pkts[nb_tx];
353                 struct rte_mbuf *m_seg = txm;
354                 int copy_size = 0;
355                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
356                 /* # of descriptors needed for a packet. */
357                 unsigned count = txm->nb_segs;
358
359                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
360                 if (count > avail) {
361                         /* Is command ring full? */
362                         if (unlikely(avail == 0)) {
363                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
364                                 txq->stats.tx_ring_full++;
365                                 txq->stats.drop_total += (nb_pkts - nb_tx);
366                                 break;
367                         }
368
369                         /* Command ring is not full but cannot handle the
370                          * multi-segmented packet. Let's try the next packet
371                          * in this case.
372                          */
373                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
374                                    "(avail %d needed %d)", avail, count);
375                         txq->stats.drop_total++;
376                         if (tso)
377                                 txq->stats.drop_tso++;
378                         rte_pktmbuf_free(txm);
379                         nb_tx++;
380                         continue;
381                 }
382
383                 /* Drop non-TSO packet that is excessively fragmented */
384                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
385                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
386                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
387                         txq->stats.drop_too_many_segs++;
388                         txq->stats.drop_total++;
389                         rte_pktmbuf_free(txm);
390                         nb_tx++;
391                         continue;
392                 }
393
394                 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
395                         struct Vmxnet3_TxDataDesc *tdd;
396
397                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
398                         copy_size = rte_pktmbuf_pkt_len(txm);
399                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
400                 }
401
402                 /* use the previous gen bit for the SOP desc */
403                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
404                 first2fill = txq->cmd_ring.next2fill;
405                 do {
406                         /* Remember the transmit buffer for cleanup */
407                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
408
409                         /* NB: the following assumes that VMXNET3 maximum
410                          * transmit buffer size (16K) is greater than
411                          * maximum size of mbuf segment size.
412                          */
413                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
414                         if (copy_size)
415                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
416                                                                 txq->cmd_ring.next2fill *
417                                                                 sizeof(struct Vmxnet3_TxDataDesc));
418                         else
419                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
420
421                         gdesc->dword[2] = dw2 | m_seg->data_len;
422                         gdesc->dword[3] = 0;
423
424                         /* move to the next2fill descriptor */
425                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
426
427                         /* use the right gen for non-SOP desc */
428                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
429                 } while ((m_seg = m_seg->next) != NULL);
430
431                 /* set the last buf_info for the pkt */
432                 tbi->m = txm;
433                 /* Update the EOP descriptor */
434                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
435
436                 /* Add VLAN tag if present */
437                 gdesc = txq->cmd_ring.base + first2fill;
438                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
439                         gdesc->txd.ti = 1;
440                         gdesc->txd.tci = txm->vlan_tci;
441                 }
442
443                 if (tso) {
444                         uint16_t mss = txm->tso_segsz;
445
446                         RTE_ASSERT(mss > 0);
447
448                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
449                         gdesc->txd.om = VMXNET3_OM_TSO;
450                         gdesc->txd.msscof = mss;
451
452                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
453                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
454                         gdesc->txd.om = VMXNET3_OM_CSUM;
455                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
456
457                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
458                         case PKT_TX_TCP_CKSUM:
459                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
460                                 break;
461                         case PKT_TX_UDP_CKSUM:
462                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
463                                 break;
464                         default:
465                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
466                                            txm->ol_flags & PKT_TX_L4_MASK);
467                                 abort();
468                         }
469                         deferred++;
470                 } else {
471                         gdesc->txd.hlen = 0;
472                         gdesc->txd.om = VMXNET3_OM_NONE;
473                         gdesc->txd.msscof = 0;
474                         deferred++;
475                 }
476
477                 /* flip the GEN bit on the SOP */
478                 rte_compiler_barrier();
479                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
480
481                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
482                 nb_tx++;
483         }
484
485         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
486
487         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
488                 txq_ctrl->txNumDeferred = 0;
489                 /* Notify vSwitch that packets are available. */
490                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
491                                        txq->cmd_ring.next2fill);
492         }
493
494         return nb_tx;
495 }
496
497 /*
498  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
499  *  so that device can receive packets in those buffers.
500  *      Ring layout:
501  *      Among the two rings, 1st ring contains buffers of type 0 and type1.
502  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
503  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
504  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
505  *      only for LRO.
506  *
507  */
508 static int
509 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
510 {
511         int err = 0;
512         uint32_t i = 0, val = 0;
513         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
514
515         if (ring_id == 0) {
516                 /* Usually: One HEAD type buf per packet
517                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
518                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
519                  */
520
521                 /* We use single packet buffer so all heads here */
522                 val = VMXNET3_RXD_BTYPE_HEAD;
523         } else {
524                 /* All BODY type buffers for 2nd ring */
525                 val = VMXNET3_RXD_BTYPE_BODY;
526         }
527
528         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
529                 struct Vmxnet3_RxDesc *rxd;
530                 struct rte_mbuf *mbuf;
531                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
532
533                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
534
535                 /* Allocate blank mbuf for the current Rx Descriptor */
536                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
537                 if (unlikely(mbuf == NULL)) {
538                         PMD_RX_LOG(ERR, "Error allocating mbuf");
539                         rxq->stats.rx_buf_alloc_failure++;
540                         err = ENOMEM;
541                         break;
542                 }
543
544                 /*
545                  * Load mbuf pointer into buf_info[ring_size]
546                  * buf_info structure is equivalent to cookie for virtio-virtqueue
547                  */
548                 buf_info->m = mbuf;
549                 buf_info->len = (uint16_t)(mbuf->buf_len -
550                                            RTE_PKTMBUF_HEADROOM);
551                 buf_info->bufPA =
552                         rte_mbuf_data_dma_addr_default(mbuf);
553
554                 /* Load Rx Descriptor with the buffer's GPA */
555                 rxd->addr = buf_info->bufPA;
556
557                 /* After this point rxd->addr MUST not be NULL */
558                 rxd->btype = val;
559                 rxd->len = buf_info->len;
560                 /* Flip gen bit at the end to change ownership */
561                 rxd->gen = ring->gen;
562
563                 vmxnet3_cmd_ring_adv_next2fill(ring);
564                 i++;
565         }
566
567         /* Return error only if no buffers are posted at present */
568         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
569                 return -err;
570         else
571                 return i;
572 }
573
574
575 /* Receive side checksum and other offloads */
576 static void
577 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
578 {
579         /* Check for RSS */
580         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
581                 rxm->ol_flags |= PKT_RX_RSS_HASH;
582                 rxm->hash.rss = rcd->rssHash;
583         }
584
585         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
586         if (rcd->v4) {
587                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
588                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
589
590                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
591                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
592                 else
593                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
594
595                 if (!rcd->cnc) {
596                         if (!rcd->ipc)
597                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
598
599                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
600                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
601                 }
602         }
603 }
604
605 /*
606  * Process the Rx Completion Ring of given vmxnet3_rx_queue
607  * for nb_pkts burst and return the number of packets received
608  */
609 uint16_t
610 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
611 {
612         uint16_t nb_rx;
613         uint32_t nb_rxd, idx;
614         uint8_t ring_idx;
615         vmxnet3_rx_queue_t *rxq;
616         Vmxnet3_RxCompDesc *rcd;
617         vmxnet3_buf_info_t *rbi;
618         Vmxnet3_RxDesc *rxd;
619         struct rte_mbuf *rxm = NULL;
620         struct vmxnet3_hw *hw;
621
622         nb_rx = 0;
623         ring_idx = 0;
624         nb_rxd = 0;
625         idx = 0;
626
627         rxq = rx_queue;
628         hw = rxq->hw;
629
630         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
631
632         if (unlikely(rxq->stopped)) {
633                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
634                 return 0;
635         }
636
637         while (rcd->gen == rxq->comp_ring.gen) {
638                 if (nb_rx >= nb_pkts)
639                         break;
640
641                 idx = rcd->rxdIdx;
642                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
643                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
644                 RTE_SET_USED(rxd); /* used only for assert when enabled */
645                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
646
647                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
648
649                 RTE_ASSERT(rcd->len <= rxd->len);
650                 RTE_ASSERT(rbi->m);
651
652                 /* Get the packet buffer pointer from buf_info */
653                 rxm = rbi->m;
654
655                 /* Clear descriptor associated buf_info to be reused */
656                 rbi->m = NULL;
657                 rbi->bufPA = 0;
658
659                 /* Update the index that we received a packet */
660                 rxq->cmd_ring[ring_idx].next2comp = idx;
661
662                 /* For RCD with EOP set, check if there is frame error */
663                 if (unlikely(rcd->eop && rcd->err)) {
664                         rxq->stats.drop_total++;
665                         rxq->stats.drop_err++;
666
667                         if (!rcd->fcs) {
668                                 rxq->stats.drop_fcs++;
669                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
670                         }
671                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
672                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
673                                          rxq->comp_ring.base), rcd->rxdIdx);
674                         rte_pktmbuf_free_seg(rxm);
675                         goto rcd_done;
676                 }
677
678
679                 /* Initialize newly received packet buffer */
680                 rxm->port = rxq->port_id;
681                 rxm->nb_segs = 1;
682                 rxm->next = NULL;
683                 rxm->pkt_len = (uint16_t)rcd->len;
684                 rxm->data_len = (uint16_t)rcd->len;
685                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
686                 rxm->ol_flags = 0;
687                 rxm->vlan_tci = 0;
688
689                 /*
690                  * If this is the first buffer of the received packet,
691                  * set the pointer to the first mbuf of the packet
692                  * Otherwise, update the total length and the number of segments
693                  * of the current scattered packet, and update the pointer to
694                  * the last mbuf of the current packet.
695                  */
696                 if (rcd->sop) {
697                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
698
699                         if (unlikely(rcd->len == 0)) {
700                                 RTE_ASSERT(rcd->eop);
701
702                                 PMD_RX_LOG(DEBUG,
703                                            "Rx buf was skipped. rxring[%d][%d])",
704                                            ring_idx, idx);
705                                 rte_pktmbuf_free_seg(rxm);
706                                 goto rcd_done;
707                         }
708
709                         rxq->start_seg = rxm;
710                         vmxnet3_rx_offload(rcd, rxm);
711                 } else {
712                         struct rte_mbuf *start = rxq->start_seg;
713
714                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
715
716                         start->pkt_len += rxm->data_len;
717                         start->nb_segs++;
718
719                         rxq->last_seg->next = rxm;
720                 }
721                 rxq->last_seg = rxm;
722
723                 if (rcd->eop) {
724                         struct rte_mbuf *start = rxq->start_seg;
725
726                         /* Check for hardware stripped VLAN tag */
727                         if (rcd->ts) {
728                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
729                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
730                         }
731
732                         rx_pkts[nb_rx++] = start;
733                         rxq->start_seg = NULL;
734                 }
735
736 rcd_done:
737                 rxq->cmd_ring[ring_idx].next2comp = idx;
738                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
739
740                 /* It's time to allocate some new buf and renew descriptors */
741                 vmxnet3_post_rx_bufs(rxq, ring_idx);
742                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
743                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
744                                                rxq->cmd_ring[ring_idx].next2fill);
745                 }
746
747                 /* Advance to the next descriptor in comp_ring */
748                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
749
750                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
751                 nb_rxd++;
752                 if (nb_rxd > rxq->cmd_ring[0].size) {
753                         PMD_RX_LOG(ERR,
754                                    "Used up quota of receiving packets,"
755                                    " relinquish control.");
756                         break;
757                 }
758         }
759
760         return nb_rx;
761 }
762
763 /*
764  * Create memzone for device rings. malloc can't be used as the physical address is
765  * needed. If the memzone is already created, then this function returns a ptr
766  * to the old one.
767  */
768 static const struct rte_memzone *
769 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
770                       uint16_t queue_id, uint32_t ring_size, int socket_id)
771 {
772         char z_name[RTE_MEMZONE_NAMESIZE];
773         const struct rte_memzone *mz;
774
775         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
776                         dev->driver->pci_drv.name, ring_name,
777                         dev->data->port_id, queue_id);
778
779         mz = rte_memzone_lookup(z_name);
780         if (mz)
781                 return mz;
782
783         return rte_memzone_reserve_aligned(z_name, ring_size,
784                         socket_id, 0, VMXNET3_RING_BA_ALIGN);
785 }
786
787 int
788 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
789                            uint16_t queue_idx,
790                            uint16_t nb_desc,
791                            unsigned int socket_id,
792                            __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
793 {
794         struct vmxnet3_hw *hw = dev->data->dev_private;
795         const struct rte_memzone *mz;
796         struct vmxnet3_tx_queue *txq;
797         struct vmxnet3_cmd_ring *ring;
798         struct vmxnet3_comp_ring *comp_ring;
799         struct vmxnet3_data_ring *data_ring;
800         int size;
801
802         PMD_INIT_FUNC_TRACE();
803
804         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
805             ETH_TXQ_FLAGS_NOXSUMSCTP) {
806                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
807                 return -EINVAL;
808         }
809
810         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE);
811         if (txq == NULL) {
812                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
813                 return -ENOMEM;
814         }
815
816         txq->queue_id = queue_idx;
817         txq->port_id = dev->data->port_id;
818         txq->shared = &hw->tqd_start[queue_idx];
819         txq->hw = hw;
820         txq->qid = queue_idx;
821         txq->stopped = TRUE;
822
823         ring = &txq->cmd_ring;
824         comp_ring = &txq->comp_ring;
825         data_ring = &txq->data_ring;
826
827         /* Tx vmxnet ring length should be between 512-4096 */
828         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
829                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
830                              VMXNET3_DEF_TX_RING_SIZE);
831                 return -EINVAL;
832         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
833                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
834                              VMXNET3_TX_RING_MAX_SIZE);
835                 return -EINVAL;
836         } else {
837                 ring->size = nb_desc;
838                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
839         }
840         comp_ring->size = data_ring->size = ring->size;
841
842         /* Tx vmxnet rings structure initialization*/
843         ring->next2fill = 0;
844         ring->next2comp = 0;
845         ring->gen = VMXNET3_INIT_GEN;
846         comp_ring->next2proc = 0;
847         comp_ring->gen = VMXNET3_INIT_GEN;
848
849         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
850         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
851         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
852
853         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
854         if (mz == NULL) {
855                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
856                 return -ENOMEM;
857         }
858         memset(mz->addr, 0, mz->len);
859
860         /* cmd_ring initialization */
861         ring->base = mz->addr;
862         ring->basePA = mz->phys_addr;
863
864         /* comp_ring initialization */
865         comp_ring->base = ring->base + ring->size;
866         comp_ring->basePA = ring->basePA +
867                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
868
869         /* data_ring initialization */
870         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
871         data_ring->basePA = comp_ring->basePA +
872                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
873
874         /* cmd_ring0 buf_info allocation */
875         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
876                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
877         if (ring->buf_info == NULL) {
878                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
879                 return -ENOMEM;
880         }
881
882         /* Update the data portion with txq */
883         dev->data->tx_queues[queue_idx] = txq;
884
885         return 0;
886 }
887
888 int
889 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
890                            uint16_t queue_idx,
891                            uint16_t nb_desc,
892                            unsigned int socket_id,
893                            __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
894                            struct rte_mempool *mp)
895 {
896         const struct rte_memzone *mz;
897         struct vmxnet3_rx_queue *rxq;
898         struct vmxnet3_hw     *hw = dev->data->dev_private;
899         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
900         struct vmxnet3_comp_ring *comp_ring;
901         int size;
902         uint8_t i;
903         char mem_name[32];
904
905         PMD_INIT_FUNC_TRACE();
906
907         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
908         if (rxq == NULL) {
909                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
910                 return -ENOMEM;
911         }
912
913         rxq->mp = mp;
914         rxq->queue_id = queue_idx;
915         rxq->port_id = dev->data->port_id;
916         rxq->shared = &hw->rqd_start[queue_idx];
917         rxq->hw = hw;
918         rxq->qid1 = queue_idx;
919         rxq->qid2 = queue_idx + hw->num_rx_queues;
920         rxq->stopped = TRUE;
921
922         ring0 = &rxq->cmd_ring[0];
923         ring1 = &rxq->cmd_ring[1];
924         comp_ring = &rxq->comp_ring;
925
926         /* Rx vmxnet rings length should be between 256-4096 */
927         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
928                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
929                 return -EINVAL;
930         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
931                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
932                 return -EINVAL;
933         } else {
934                 ring0->size = nb_desc;
935                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
936                 ring1->size = ring0->size;
937         }
938
939         comp_ring->size = ring0->size + ring1->size;
940
941         /* Rx vmxnet rings structure initialization */
942         ring0->next2fill = 0;
943         ring1->next2fill = 0;
944         ring0->next2comp = 0;
945         ring1->next2comp = 0;
946         ring0->gen = VMXNET3_INIT_GEN;
947         ring1->gen = VMXNET3_INIT_GEN;
948         comp_ring->next2proc = 0;
949         comp_ring->gen = VMXNET3_INIT_GEN;
950
951         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
952         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
953
954         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
955         if (mz == NULL) {
956                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
957                 return -ENOMEM;
958         }
959         memset(mz->addr, 0, mz->len);
960
961         /* cmd_ring0 initialization */
962         ring0->base = mz->addr;
963         ring0->basePA = mz->phys_addr;
964
965         /* cmd_ring1 initialization */
966         ring1->base = ring0->base + ring0->size;
967         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
968
969         /* comp_ring initialization */
970         comp_ring->base = ring1->base + ring1->size;
971         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
972                 ring1->size;
973
974         /* cmd_ring0-cmd_ring1 buf_info allocation */
975         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
976
977                 ring = &rxq->cmd_ring[i];
978                 ring->rid = i;
979                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
980
981                 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
982                 if (ring->buf_info == NULL) {
983                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
984                         return -ENOMEM;
985                 }
986         }
987
988         /* Update the data portion with rxq */
989         dev->data->rx_queues[queue_idx] = rxq;
990
991         return 0;
992 }
993
994 /*
995  * Initializes Receive Unit
996  * Load mbufs in rx queue in advance
997  */
998 int
999 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1000 {
1001         struct vmxnet3_hw *hw = dev->data->dev_private;
1002
1003         int i, ret;
1004         uint8_t j;
1005
1006         PMD_INIT_FUNC_TRACE();
1007
1008         for (i = 0; i < hw->num_rx_queues; i++) {
1009                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1010
1011                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1012                         /* Passing 0 as alloc_num will allocate full ring */
1013                         ret = vmxnet3_post_rx_bufs(rxq, j);
1014                         if (ret <= 0) {
1015                                 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j);
1016                                 return -ret;
1017                         }
1018                         /* Updating device with the index:next2fill to fill the mbufs for coming packets */
1019                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1020                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1021                                                        rxq->cmd_ring[j].next2fill);
1022                         }
1023                 }
1024                 rxq->stopped = FALSE;
1025                 rxq->start_seg = NULL;
1026         }
1027
1028         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1029                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1030
1031                 txq->stopped = FALSE;
1032         }
1033
1034         return 0;
1035 }
1036
1037 static uint8_t rss_intel_key[40] = {
1038         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1039         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1040         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1041         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1042         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1043 };
1044
1045 /*
1046  * Configure RSS feature
1047  */
1048 int
1049 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1050 {
1051         struct vmxnet3_hw *hw = dev->data->dev_private;
1052         struct VMXNET3_RSSConf *dev_rss_conf;
1053         struct rte_eth_rss_conf *port_rss_conf;
1054         uint64_t rss_hf;
1055         uint8_t i, j;
1056
1057         PMD_INIT_FUNC_TRACE();
1058
1059         dev_rss_conf = hw->rss_conf;
1060         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1061
1062         /* loading hashFunc */
1063         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1064         /* loading hashKeySize */
1065         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1066         /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1067         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1068
1069         if (port_rss_conf->rss_key == NULL) {
1070                 /* Default hash key */
1071                 port_rss_conf->rss_key = rss_intel_key;
1072         }
1073
1074         /* loading hashKey */
1075         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize);
1076
1077         /* loading indTable */
1078         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1079                 if (j == dev->data->nb_rx_queues)
1080                         j = 0;
1081                 dev_rss_conf->indTable[i] = j;
1082         }
1083
1084         /* loading hashType */
1085         dev_rss_conf->hashType = 0;
1086         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1087         if (rss_hf & ETH_RSS_IPV4)
1088                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1089         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1090                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1091         if (rss_hf & ETH_RSS_IPV6)
1092                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1093         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1094                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1095
1096         return VMXNET3_SUCCESS;
1097 }