net/vmxnet3: coding style changes
[dpdk.git] / drivers / net / vmxnet3 / vmxnet3_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <stdint.h>
41 #include <stdarg.h>
42 #include <unistd.h>
43 #include <inttypes.h>
44
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_interrupts.h>
51 #include <rte_pci.h>
52 #include <rte_memory.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_eal.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_mempool.h>
61 #include <rte_malloc.h>
62 #include <rte_mbuf.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_prefetch.h>
66 #include <rte_ip.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72
73 #include "base/vmxnet3_defs.h"
74 #include "vmxnet3_ring.h"
75
76 #include "vmxnet3_logs.h"
77 #include "vmxnet3_ethdev.h"
78
79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
80
81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
86 #endif
87
88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
89 static void
90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
91 {
92         uint32_t avail = 0;
93
94         if (rxq == NULL)
95                 return;
96
97         PMD_RX_LOG(DEBUG,
98                    "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
99                    rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
100         PMD_RX_LOG(DEBUG,
101                    "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
102                    (unsigned long)rxq->cmd_ring[0].basePA,
103                    (unsigned long)rxq->cmd_ring[1].basePA,
104                    (unsigned long)rxq->comp_ring.basePA);
105
106         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
107         PMD_RX_LOG(DEBUG,
108                    "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
109                    (uint32_t)rxq->cmd_ring[0].size, avail,
110                    rxq->comp_ring.next2proc,
111                    rxq->cmd_ring[0].size - avail);
112
113         avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
114         PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
115                    (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
116                    rxq->cmd_ring[1].size - avail);
117
118 }
119
120 static void
121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
122 {
123         uint32_t avail = 0;
124
125         if (txq == NULL)
126                 return;
127
128         PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
129                    txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
130         PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
131                    (unsigned long)txq->cmd_ring.basePA,
132                    (unsigned long)txq->comp_ring.basePA,
133                    (unsigned long)txq->data_ring.basePA);
134
135         avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
136         PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
137                    (uint32_t)txq->cmd_ring.size, avail,
138                    txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
139 }
140 #endif
141
142 static void
143 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
144 {
145         while (ring->next2comp != ring->next2fill) {
146                 /* No need to worry about tx desc ownership, device is quiesced by now. */
147                 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
148
149                 if (buf_info->m) {
150                         rte_pktmbuf_free(buf_info->m);
151                         buf_info->m = NULL;
152                         buf_info->bufPA = 0;
153                         buf_info->len = 0;
154                 }
155                 vmxnet3_cmd_ring_adv_next2comp(ring);
156         }
157 }
158
159 static void
160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162         vmxnet3_cmd_ring_release_mbufs(ring);
163         rte_free(ring->buf_info);
164         ring->buf_info = NULL;
165 }
166
167 void
168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170         vmxnet3_tx_queue_t *tq = txq;
171
172         if (tq != NULL) {
173                 /* Release the cmd_ring */
174                 vmxnet3_cmd_ring_release(&tq->cmd_ring);
175         }
176 }
177
178 void
179 vmxnet3_dev_rx_queue_release(void *rxq)
180 {
181         int i;
182         vmxnet3_rx_queue_t *rq = rxq;
183
184         if (rq != NULL) {
185                 /* Release both the cmd_rings */
186                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
187                         vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
188         }
189 }
190
191 static void
192 vmxnet3_dev_tx_queue_reset(void *txq)
193 {
194         vmxnet3_tx_queue_t *tq = txq;
195         struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
196         struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
197         struct vmxnet3_data_ring *data_ring = &tq->data_ring;
198         int size;
199
200         if (tq != NULL) {
201                 /* Release the cmd_ring mbufs */
202                 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring);
203         }
204
205         /* Tx vmxnet rings structure initialization*/
206         ring->next2fill = 0;
207         ring->next2comp = 0;
208         ring->gen = VMXNET3_INIT_GEN;
209         comp_ring->next2proc = 0;
210         comp_ring->gen = VMXNET3_INIT_GEN;
211
212         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
213         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
214         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
215
216         memset(ring->base, 0, size);
217 }
218
219 static void
220 vmxnet3_dev_rx_queue_reset(void *rxq)
221 {
222         int i;
223         vmxnet3_rx_queue_t *rq = rxq;
224         struct vmxnet3_cmd_ring *ring0, *ring1;
225         struct vmxnet3_comp_ring *comp_ring;
226         int size;
227
228         if (rq != NULL) {
229                 /* Release both the cmd_rings mbufs */
230                 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
231                         vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
232         }
233
234         ring0 = &rq->cmd_ring[0];
235         ring1 = &rq->cmd_ring[1];
236         comp_ring = &rq->comp_ring;
237
238         /* Rx vmxnet rings structure initialization */
239         ring0->next2fill = 0;
240         ring1->next2fill = 0;
241         ring0->next2comp = 0;
242         ring1->next2comp = 0;
243         ring0->gen = VMXNET3_INIT_GEN;
244         ring1->gen = VMXNET3_INIT_GEN;
245         comp_ring->next2proc = 0;
246         comp_ring->gen = VMXNET3_INIT_GEN;
247
248         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
249         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
250
251         memset(ring0->base, 0, size);
252 }
253
254 void
255 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
256 {
257         unsigned i;
258
259         PMD_INIT_FUNC_TRACE();
260
261         for (i = 0; i < dev->data->nb_tx_queues; i++) {
262                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
263
264                 if (txq != NULL) {
265                         txq->stopped = TRUE;
266                         vmxnet3_dev_tx_queue_reset(txq);
267                 }
268         }
269
270         for (i = 0; i < dev->data->nb_rx_queues; i++) {
271                 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
272
273                 if (rxq != NULL) {
274                         rxq->stopped = TRUE;
275                         vmxnet3_dev_rx_queue_reset(rxq);
276                 }
277         }
278 }
279
280 static int
281 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
282 {
283         int completed = 0;
284         struct rte_mbuf *mbuf;
285
286         /* Release cmd_ring descriptor and free mbuf */
287         RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
288
289         mbuf = txq->cmd_ring.buf_info[eop_idx].m;
290         if (mbuf == NULL)
291                 rte_panic("EOP desc does not point to a valid mbuf");
292         rte_pktmbuf_free(mbuf);
293
294         txq->cmd_ring.buf_info[eop_idx].m = NULL;
295
296         while (txq->cmd_ring.next2comp != eop_idx) {
297                 /* no out-of-order completion */
298                 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
299                 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
300                 completed++;
301         }
302
303         /* Mark the txd for which tcd was generated as completed */
304         vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
305
306         return completed + 1;
307 }
308
309 static void
310 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
311 {
312         int completed = 0;
313         vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
314         struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
315                 (comp_ring->base + comp_ring->next2proc);
316
317         while (tcd->gen == comp_ring->gen) {
318                 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
319
320                 vmxnet3_comp_ring_adv_next2proc(comp_ring);
321                 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
322                                                     comp_ring->next2proc);
323         }
324
325         PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
326 }
327
328 uint16_t
329 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
330                   uint16_t nb_pkts)
331 {
332         uint16_t nb_tx;
333         vmxnet3_tx_queue_t *txq = tx_queue;
334         struct vmxnet3_hw *hw = txq->hw;
335         Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
336         uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
337
338         if (unlikely(txq->stopped)) {
339                 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
340                 return 0;
341         }
342
343         /* Free up the comp_descriptors aggressively */
344         vmxnet3_tq_tx_complete(txq);
345
346         nb_tx = 0;
347         while (nb_tx < nb_pkts) {
348                 Vmxnet3_GenericDesc *gdesc;
349                 vmxnet3_buf_info_t *tbi;
350                 uint32_t first2fill, avail, dw2;
351                 struct rte_mbuf *txm = tx_pkts[nb_tx];
352                 struct rte_mbuf *m_seg = txm;
353                 int copy_size = 0;
354                 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
355                 /* # of descriptors needed for a packet. */
356                 unsigned count = txm->nb_segs;
357
358                 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
359                 if (count > avail) {
360                         /* Is command ring full? */
361                         if (unlikely(avail == 0)) {
362                                 PMD_TX_LOG(DEBUG, "No free ring descriptors");
363                                 txq->stats.tx_ring_full++;
364                                 txq->stats.drop_total += (nb_pkts - nb_tx);
365                                 break;
366                         }
367
368                         /* Command ring is not full but cannot handle the
369                          * multi-segmented packet. Let's try the next packet
370                          * in this case.
371                          */
372                         PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
373                                    "(avail %d needed %d)", avail, count);
374                         txq->stats.drop_total++;
375                         if (tso)
376                                 txq->stats.drop_tso++;
377                         rte_pktmbuf_free(txm);
378                         nb_tx++;
379                         continue;
380                 }
381
382                 /* Drop non-TSO packet that is excessively fragmented */
383                 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
384                         PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
385                                    "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
386                         txq->stats.drop_too_many_segs++;
387                         txq->stats.drop_total++;
388                         rte_pktmbuf_free(txm);
389                         nb_tx++;
390                         continue;
391                 }
392
393                 if (txm->nb_segs == 1 &&
394                     rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
395                         struct Vmxnet3_TxDataDesc *tdd;
396
397                         tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
398                         copy_size = rte_pktmbuf_pkt_len(txm);
399                         rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
400                 }
401
402                 /* use the previous gen bit for the SOP desc */
403                 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
404                 first2fill = txq->cmd_ring.next2fill;
405                 do {
406                         /* Remember the transmit buffer for cleanup */
407                         tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
408
409                         /* NB: the following assumes that VMXNET3 maximum
410                          * transmit buffer size (16K) is greater than
411                          * maximum size of mbuf segment size.
412                          */
413                         gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
414                         if (copy_size)
415                                 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
416                                                                    txq->cmd_ring.next2fill *
417                                                                    sizeof(struct Vmxnet3_TxDataDesc));
418                         else
419                                 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
420
421                         gdesc->dword[2] = dw2 | m_seg->data_len;
422                         gdesc->dword[3] = 0;
423
424                         /* move to the next2fill descriptor */
425                         vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
426
427                         /* use the right gen for non-SOP desc */
428                         dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
429                 } while ((m_seg = m_seg->next) != NULL);
430
431                 /* set the last buf_info for the pkt */
432                 tbi->m = txm;
433                 /* Update the EOP descriptor */
434                 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
435
436                 /* Add VLAN tag if present */
437                 gdesc = txq->cmd_ring.base + first2fill;
438                 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
439                         gdesc->txd.ti = 1;
440                         gdesc->txd.tci = txm->vlan_tci;
441                 }
442
443                 if (tso) {
444                         uint16_t mss = txm->tso_segsz;
445
446                         RTE_ASSERT(mss > 0);
447
448                         gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
449                         gdesc->txd.om = VMXNET3_OM_TSO;
450                         gdesc->txd.msscof = mss;
451
452                         deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
453                 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
454                         gdesc->txd.om = VMXNET3_OM_CSUM;
455                         gdesc->txd.hlen = txm->l2_len + txm->l3_len;
456
457                         switch (txm->ol_flags & PKT_TX_L4_MASK) {
458                         case PKT_TX_TCP_CKSUM:
459                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum);
460                                 break;
461                         case PKT_TX_UDP_CKSUM:
462                                 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum);
463                                 break;
464                         default:
465                                 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
466                                            txm->ol_flags & PKT_TX_L4_MASK);
467                                 abort();
468                         }
469                         deferred++;
470                 } else {
471                         gdesc->txd.hlen = 0;
472                         gdesc->txd.om = VMXNET3_OM_NONE;
473                         gdesc->txd.msscof = 0;
474                         deferred++;
475                 }
476
477                 /* flip the GEN bit on the SOP */
478                 rte_compiler_barrier();
479                 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
480
481                 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
482                 nb_tx++;
483         }
484
485         PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
486
487         if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
488                 txq_ctrl->txNumDeferred = 0;
489                 /* Notify vSwitch that packets are available. */
490                 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
491                                        txq->cmd_ring.next2fill);
492         }
493
494         return nb_tx;
495 }
496
497 /*
498  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
499  *  so that device can receive packets in those buffers.
500  *  Ring layout:
501  *      Among the two rings, 1st ring contains buffers of type 0 and type 1.
502  *      bufs_per_pkt is set such that for non-LRO cases all the buffers required
503  *      by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
504  *      2nd ring contains buffers of type 1 alone. Second ring mostly be used
505  *      only for LRO.
506  */
507 static int
508 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
509 {
510         int err = 0;
511         uint32_t i = 0, val = 0;
512         struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
513
514         if (ring_id == 0) {
515                 /* Usually: One HEAD type buf per packet
516                  * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
517                  * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
518                  */
519
520                 /* We use single packet buffer so all heads here */
521                 val = VMXNET3_RXD_BTYPE_HEAD;
522         } else {
523                 /* All BODY type buffers for 2nd ring */
524                 val = VMXNET3_RXD_BTYPE_BODY;
525         }
526
527         while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
528                 struct Vmxnet3_RxDesc *rxd;
529                 struct rte_mbuf *mbuf;
530                 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
531
532                 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
533
534                 /* Allocate blank mbuf for the current Rx Descriptor */
535                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
536                 if (unlikely(mbuf == NULL)) {
537                         PMD_RX_LOG(ERR, "Error allocating mbuf");
538                         rxq->stats.rx_buf_alloc_failure++;
539                         err = ENOMEM;
540                         break;
541                 }
542
543                 /*
544                  * Load mbuf pointer into buf_info[ring_size]
545                  * buf_info structure is equivalent to cookie for virtio-virtqueue
546                  */
547                 buf_info->m = mbuf;
548                 buf_info->len = (uint16_t)(mbuf->buf_len -
549                                            RTE_PKTMBUF_HEADROOM);
550                 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
551
552                 /* Load Rx Descriptor with the buffer's GPA */
553                 rxd->addr = buf_info->bufPA;
554
555                 /* After this point rxd->addr MUST not be NULL */
556                 rxd->btype = val;
557                 rxd->len = buf_info->len;
558                 /* Flip gen bit at the end to change ownership */
559                 rxd->gen = ring->gen;
560
561                 vmxnet3_cmd_ring_adv_next2fill(ring);
562                 i++;
563         }
564
565         /* Return error only if no buffers are posted at present */
566         if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
567                 return -err;
568         else
569                 return i;
570 }
571
572
573 /* Receive side checksum and other offloads */
574 static void
575 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm)
576 {
577         /* Check for RSS */
578         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
579                 rxm->ol_flags |= PKT_RX_RSS_HASH;
580                 rxm->hash.rss = rcd->rssHash;
581         }
582
583         /* Check packet type, checksum errors, etc. Only support IPv4 for now. */
584         if (rcd->v4) {
585                 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);
586                 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);
587
588                 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))
589                         rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT;
590                 else
591                         rxm->packet_type = RTE_PTYPE_L3_IPV4;
592
593                 if (!rcd->cnc) {
594                         if (!rcd->ipc)
595                                 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
596
597                         if ((rcd->tcp || rcd->udp) && !rcd->tuc)
598                                 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
599                 }
600         }
601 }
602
603 /*
604  * Process the Rx Completion Ring of given vmxnet3_rx_queue
605  * for nb_pkts burst and return the number of packets received
606  */
607 uint16_t
608 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
609 {
610         uint16_t nb_rx;
611         uint32_t nb_rxd, idx;
612         uint8_t ring_idx;
613         vmxnet3_rx_queue_t *rxq;
614         Vmxnet3_RxCompDesc *rcd;
615         vmxnet3_buf_info_t *rbi;
616         Vmxnet3_RxDesc *rxd;
617         struct rte_mbuf *rxm = NULL;
618         struct vmxnet3_hw *hw;
619
620         nb_rx = 0;
621         ring_idx = 0;
622         nb_rxd = 0;
623         idx = 0;
624
625         rxq = rx_queue;
626         hw = rxq->hw;
627
628         rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
629
630         if (unlikely(rxq->stopped)) {
631                 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
632                 return 0;
633         }
634
635         while (rcd->gen == rxq->comp_ring.gen) {
636                 if (nb_rx >= nb_pkts)
637                         break;
638
639                 idx = rcd->rxdIdx;
640                 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
641                 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
642                 RTE_SET_USED(rxd); /* used only for assert when enabled */
643                 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
644
645                 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
646
647                 RTE_ASSERT(rcd->len <= rxd->len);
648                 RTE_ASSERT(rbi->m);
649
650                 /* Get the packet buffer pointer from buf_info */
651                 rxm = rbi->m;
652
653                 /* Clear descriptor associated buf_info to be reused */
654                 rbi->m = NULL;
655                 rbi->bufPA = 0;
656
657                 /* Update the index that we received a packet */
658                 rxq->cmd_ring[ring_idx].next2comp = idx;
659
660                 /* For RCD with EOP set, check if there is frame error */
661                 if (unlikely(rcd->eop && rcd->err)) {
662                         rxq->stats.drop_total++;
663                         rxq->stats.drop_err++;
664
665                         if (!rcd->fcs) {
666                                 rxq->stats.drop_fcs++;
667                                 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
668                         }
669                         PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
670                                    (int)(rcd - (struct Vmxnet3_RxCompDesc *)
671                                          rxq->comp_ring.base), rcd->rxdIdx);
672                         rte_pktmbuf_free_seg(rxm);
673                         goto rcd_done;
674                 }
675
676                 /* Initialize newly received packet buffer */
677                 rxm->port = rxq->port_id;
678                 rxm->nb_segs = 1;
679                 rxm->next = NULL;
680                 rxm->pkt_len = (uint16_t)rcd->len;
681                 rxm->data_len = (uint16_t)rcd->len;
682                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
683                 rxm->ol_flags = 0;
684                 rxm->vlan_tci = 0;
685
686                 /*
687                  * If this is the first buffer of the received packet,
688                  * set the pointer to the first mbuf of the packet
689                  * Otherwise, update the total length and the number of segments
690                  * of the current scattered packet, and update the pointer to
691                  * the last mbuf of the current packet.
692                  */
693                 if (rcd->sop) {
694                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
695
696                         if (unlikely(rcd->len == 0)) {
697                                 RTE_ASSERT(rcd->eop);
698
699                                 PMD_RX_LOG(DEBUG,
700                                            "Rx buf was skipped. rxring[%d][%d])",
701                                            ring_idx, idx);
702                                 rte_pktmbuf_free_seg(rxm);
703                                 goto rcd_done;
704                         }
705
706                         rxq->start_seg = rxm;
707                         vmxnet3_rx_offload(rcd, rxm);
708                 } else {
709                         struct rte_mbuf *start = rxq->start_seg;
710
711                         RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
712
713                         start->pkt_len += rxm->data_len;
714                         start->nb_segs++;
715
716                         rxq->last_seg->next = rxm;
717                 }
718                 rxq->last_seg = rxm;
719
720                 if (rcd->eop) {
721                         struct rte_mbuf *start = rxq->start_seg;
722
723                         /* Check for hardware stripped VLAN tag */
724                         if (rcd->ts) {
725                                 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
726                                 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
727                         }
728
729                         rx_pkts[nb_rx++] = start;
730                         rxq->start_seg = NULL;
731                 }
732
733 rcd_done:
734                 rxq->cmd_ring[ring_idx].next2comp = idx;
735                 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
736                                           rxq->cmd_ring[ring_idx].size);
737
738                 /* It's time to allocate some new buf and renew descriptors */
739                 vmxnet3_post_rx_bufs(rxq, ring_idx);
740                 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
741                         VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
742                                                rxq->cmd_ring[ring_idx].next2fill);
743                 }
744
745                 /* Advance to the next descriptor in comp_ring */
746                 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
747
748                 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
749                 nb_rxd++;
750                 if (nb_rxd > rxq->cmd_ring[0].size) {
751                         PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
752                                    " relinquish control.");
753                         break;
754                 }
755         }
756
757         return nb_rx;
758 }
759
760 /*
761  * Create memzone for device rings. malloc can't be used as the physical address is
762  * needed. If the memzone is already created, then this function returns a ptr
763  * to the old one.
764  */
765 static const struct rte_memzone *
766 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
767                       uint16_t queue_id, uint32_t ring_size, int socket_id)
768 {
769         char z_name[RTE_MEMZONE_NAMESIZE];
770         const struct rte_memzone *mz;
771
772         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
773                  dev->driver->pci_drv.driver.name, ring_name,
774                  dev->data->port_id, queue_id);
775
776         mz = rte_memzone_lookup(z_name);
777         if (mz)
778                 return mz;
779
780         return rte_memzone_reserve_aligned(z_name, ring_size,
781                                            socket_id, 0, VMXNET3_RING_BA_ALIGN);
782 }
783
784 int
785 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
786                            uint16_t queue_idx,
787                            uint16_t nb_desc,
788                            unsigned int socket_id,
789                            __rte_unused const struct rte_eth_txconf *tx_conf)
790 {
791         struct vmxnet3_hw *hw = dev->data->dev_private;
792         const struct rte_memzone *mz;
793         struct vmxnet3_tx_queue *txq;
794         struct vmxnet3_cmd_ring *ring;
795         struct vmxnet3_comp_ring *comp_ring;
796         struct vmxnet3_data_ring *data_ring;
797         int size;
798
799         PMD_INIT_FUNC_TRACE();
800
801         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) !=
802             ETH_TXQ_FLAGS_NOXSUMSCTP) {
803                 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported");
804                 return -EINVAL;
805         }
806
807         txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
808                           RTE_CACHE_LINE_SIZE);
809         if (txq == NULL) {
810                 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
811                 return -ENOMEM;
812         }
813
814         txq->queue_id = queue_idx;
815         txq->port_id = dev->data->port_id;
816         txq->shared = &hw->tqd_start[queue_idx];
817         txq->hw = hw;
818         txq->qid = queue_idx;
819         txq->stopped = TRUE;
820
821         ring = &txq->cmd_ring;
822         comp_ring = &txq->comp_ring;
823         data_ring = &txq->data_ring;
824
825         /* Tx vmxnet ring length should be between 512-4096 */
826         if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
827                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
828                              VMXNET3_DEF_TX_RING_SIZE);
829                 return -EINVAL;
830         } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
831                 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
832                              VMXNET3_TX_RING_MAX_SIZE);
833                 return -EINVAL;
834         } else {
835                 ring->size = nb_desc;
836                 ring->size &= ~VMXNET3_RING_SIZE_MASK;
837         }
838         comp_ring->size = data_ring->size = ring->size;
839
840         /* Tx vmxnet rings structure initialization*/
841         ring->next2fill = 0;
842         ring->next2comp = 0;
843         ring->gen = VMXNET3_INIT_GEN;
844         comp_ring->next2proc = 0;
845         comp_ring->gen = VMXNET3_INIT_GEN;
846
847         size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
848         size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
849         size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size;
850
851         mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id);
852         if (mz == NULL) {
853                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
854                 return -ENOMEM;
855         }
856         memset(mz->addr, 0, mz->len);
857
858         /* cmd_ring initialization */
859         ring->base = mz->addr;
860         ring->basePA = mz->phys_addr;
861
862         /* comp_ring initialization */
863         comp_ring->base = ring->base + ring->size;
864         comp_ring->basePA = ring->basePA +
865                 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
866
867         /* data_ring initialization */
868         data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
869         data_ring->basePA = comp_ring->basePA +
870                         (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
871
872         /* cmd_ring0 buf_info allocation */
873         ring->buf_info = rte_zmalloc("tx_ring_buf_info",
874                                      ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
875         if (ring->buf_info == NULL) {
876                 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
877                 return -ENOMEM;
878         }
879
880         /* Update the data portion with txq */
881         dev->data->tx_queues[queue_idx] = txq;
882
883         return 0;
884 }
885
886 int
887 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
888                            uint16_t queue_idx,
889                            uint16_t nb_desc,
890                            unsigned int socket_id,
891                            __rte_unused const struct rte_eth_rxconf *rx_conf,
892                            struct rte_mempool *mp)
893 {
894         const struct rte_memzone *mz;
895         struct vmxnet3_rx_queue *rxq;
896         struct vmxnet3_hw *hw = dev->data->dev_private;
897         struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
898         struct vmxnet3_comp_ring *comp_ring;
899         int size;
900         uint8_t i;
901         char mem_name[32];
902
903         PMD_INIT_FUNC_TRACE();
904
905         rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
906                           RTE_CACHE_LINE_SIZE);
907         if (rxq == NULL) {
908                 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
909                 return -ENOMEM;
910         }
911
912         rxq->mp = mp;
913         rxq->queue_id = queue_idx;
914         rxq->port_id = dev->data->port_id;
915         rxq->shared = &hw->rqd_start[queue_idx];
916         rxq->hw = hw;
917         rxq->qid1 = queue_idx;
918         rxq->qid2 = queue_idx + hw->num_rx_queues;
919         rxq->stopped = TRUE;
920
921         ring0 = &rxq->cmd_ring[0];
922         ring1 = &rxq->cmd_ring[1];
923         comp_ring = &rxq->comp_ring;
924
925         /* Rx vmxnet rings length should be between 256-4096 */
926         if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
927                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
928                 return -EINVAL;
929         } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
930                 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
931                 return -EINVAL;
932         } else {
933                 ring0->size = nb_desc;
934                 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
935                 ring1->size = ring0->size;
936         }
937
938         comp_ring->size = ring0->size + ring1->size;
939
940         /* Rx vmxnet rings structure initialization */
941         ring0->next2fill = 0;
942         ring1->next2fill = 0;
943         ring0->next2comp = 0;
944         ring1->next2comp = 0;
945         ring0->gen = VMXNET3_INIT_GEN;
946         ring1->gen = VMXNET3_INIT_GEN;
947         comp_ring->next2proc = 0;
948         comp_ring->gen = VMXNET3_INIT_GEN;
949
950         size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
951         size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
952
953         mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id);
954         if (mz == NULL) {
955                 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
956                 return -ENOMEM;
957         }
958         memset(mz->addr, 0, mz->len);
959
960         /* cmd_ring0 initialization */
961         ring0->base = mz->addr;
962         ring0->basePA = mz->phys_addr;
963
964         /* cmd_ring1 initialization */
965         ring1->base = ring0->base + ring0->size;
966         ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
967
968         /* comp_ring initialization */
969         comp_ring->base = ring1->base + ring1->size;
970         comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
971                 ring1->size;
972
973         /* cmd_ring0-cmd_ring1 buf_info allocation */
974         for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
975
976                 ring = &rxq->cmd_ring[i];
977                 ring->rid = i;
978                 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
979
980                 ring->buf_info = rte_zmalloc(mem_name,
981                                              ring->size * sizeof(vmxnet3_buf_info_t),
982                                              RTE_CACHE_LINE_SIZE);
983                 if (ring->buf_info == NULL) {
984                         PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
985                         return -ENOMEM;
986                 }
987         }
988
989         /* Update the data portion with rxq */
990         dev->data->rx_queues[queue_idx] = rxq;
991
992         return 0;
993 }
994
995 /*
996  * Initializes Receive Unit
997  * Load mbufs in rx queue in advance
998  */
999 int
1000 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1001 {
1002         struct vmxnet3_hw *hw = dev->data->dev_private;
1003
1004         int i, ret;
1005         uint8_t j;
1006
1007         PMD_INIT_FUNC_TRACE();
1008
1009         for (i = 0; i < hw->num_rx_queues; i++) {
1010                 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1011
1012                 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1013                         /* Passing 0 as alloc_num will allocate full ring */
1014                         ret = vmxnet3_post_rx_bufs(rxq, j);
1015                         if (ret <= 0) {
1016                                 PMD_INIT_LOG(ERR,
1017                                              "ERROR: Posting Rxq: %d buffers ring: %d",
1018                                              i, j);
1019                                 return -ret;
1020                         }
1021                         /*
1022                          * Updating device with the index:next2fill to fill the
1023                          * mbufs for coming packets.
1024                          */
1025                         if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1026                                 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1027                                                        rxq->cmd_ring[j].next2fill);
1028                         }
1029                 }
1030                 rxq->stopped = FALSE;
1031                 rxq->start_seg = NULL;
1032         }
1033
1034         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1035                 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1036
1037                 txq->stopped = FALSE;
1038         }
1039
1040         return 0;
1041 }
1042
1043 static uint8_t rss_intel_key[40] = {
1044         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1045         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1046         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1047         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1048         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1049 };
1050
1051 /*
1052  * Configure RSS feature
1053  */
1054 int
1055 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1056 {
1057         struct vmxnet3_hw *hw = dev->data->dev_private;
1058         struct VMXNET3_RSSConf *dev_rss_conf;
1059         struct rte_eth_rss_conf *port_rss_conf;
1060         uint64_t rss_hf;
1061         uint8_t i, j;
1062
1063         PMD_INIT_FUNC_TRACE();
1064
1065         dev_rss_conf = hw->rss_conf;
1066         port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1067
1068         /* loading hashFunc */
1069         dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1070         /* loading hashKeySize */
1071         dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1072         /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1073         dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1074
1075         if (port_rss_conf->rss_key == NULL) {
1076                 /* Default hash key */
1077                 port_rss_conf->rss_key = rss_intel_key;
1078         }
1079
1080         /* loading hashKey */
1081         memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1082                dev_rss_conf->hashKeySize);
1083
1084         /* loading indTable */
1085         for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1086                 if (j == dev->data->nb_rx_queues)
1087                         j = 0;
1088                 dev_rss_conf->indTable[i] = j;
1089         }
1090
1091         /* loading hashType */
1092         dev_rss_conf->hashType = 0;
1093         rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1094         if (rss_hf & ETH_RSS_IPV4)
1095                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1096         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1097                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1098         if (rss_hf & ETH_RSS_IPV6)
1099                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1100         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1101                 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1102
1103         return VMXNET3_SUCCESS;
1104 }