ixgbe: fix Tx hang when RS distance exceeds HW limit
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG)
89
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
92 {
93         struct rte_mbuf *m;
94
95         m = __rte_mbuf_raw_alloc(mp);
96         __rte_mbuf_sanity_check_raw(m, 0);
97         return (m);
98 }
99
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while(0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i;
130
131         /* check DD bit on threshold descriptor */
132         status = txq->tx_ring[txq->tx_next_dd].wb.status;
133         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
134                 return 0;
135
136         /*
137          * first buffer to free from S/W ring is at index
138          * tx_next_dd - (tx_rs_thresh-1)
139          */
140         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
141
142         /* free buffers one at a time */
143         if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145                         txep->mbuf->next = NULL;
146                         rte_mempool_put(txep->mbuf->pool, txep->mbuf);
147                         txep->mbuf = NULL;
148                 }
149         } else {
150                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151                         rte_pktmbuf_free_seg(txep->mbuf);
152                         txep->mbuf = NULL;
153                 }
154         }
155
156         /* buffers were freed, update counters */
157         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159         if (txq->tx_next_dd >= txq->nb_tx_desc)
160                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
161
162         return txq->tx_rs_thresh;
163 }
164
165 /* Populate 4 descriptors with data from 4 mbufs */
166 static inline void
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
168 {
169         uint64_t buf_dma_addr;
170         uint32_t pkt_len;
171         int i;
172
173         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174                 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
175                 pkt_len = (*pkts)->data_len;
176
177                 /* write data to descriptor */
178                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
179
180                 txdp->read.cmd_type_len =
181                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
182
183                 txdp->read.olinfo_status =
184                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
185
186                 rte_prefetch0(&(*pkts)->pool);
187         }
188 }
189
190 /* Populate 1 descriptor with data from 1 mbuf */
191 static inline void
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
193 {
194         uint64_t buf_dma_addr;
195         uint32_t pkt_len;
196
197         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
198         pkt_len = (*pkts)->data_len;
199
200         /* write data to descriptor */
201         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202         txdp->read.cmd_type_len =
203                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204         txdp->read.olinfo_status =
205                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206         rte_prefetch0(&(*pkts)->pool);
207 }
208
209 /*
210  * Fill H/W descriptor ring with mbuf data.
211  * Copy mbuf pointers to the S/W ring.
212  */
213 static inline void
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
215                       uint16_t nb_pkts)
216 {
217         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219         const int N_PER_LOOP = 4;
220         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221         int mainpart, leftover;
222         int i, j;
223
224         /*
225          * Process most of the packets in chunks of N pkts.  Any
226          * leftover packets will get processed one at a time.
227          */
228         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
230         for (i = 0; i < mainpart; i += N_PER_LOOP) {
231                 /* Copy N mbuf pointers to the S/W ring */
232                 for (j = 0; j < N_PER_LOOP; ++j) {
233                         (txep + i + j)->mbuf = *(pkts + i + j);
234                 }
235                 tx4(txdp + i, pkts + i);
236         }
237
238         if (unlikely(leftover > 0)) {
239                 for (i = 0; i < leftover; ++i) {
240                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241                         tx1(txdp + mainpart + i, pkts + mainpart + i);
242                 }
243         }
244 }
245
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
248              uint16_t nb_pkts)
249 {
250         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
252         uint16_t n = 0;
253
254         /*
255          * Begin scanning the H/W ring for done descriptors when the
256          * number of available descriptors drops below tx_free_thresh.  For
257          * each done descriptor, free the associated buffer.
258          */
259         if (txq->nb_tx_free < txq->tx_free_thresh)
260                 ixgbe_tx_free_bufs(txq);
261
262         /* Only use descriptors that are available */
263         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264         if (unlikely(nb_pkts == 0))
265                 return 0;
266
267         /* Use exactly nb_pkts descriptors */
268         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
269
270         /*
271          * At this point, we know there are enough descriptors in the
272          * ring to transmit all the packets.  This assumes that each
273          * mbuf contains a single segment, and that no new offloads
274          * are expected, which would require a new context descriptor.
275          */
276
277         /*
278          * See if we're going to wrap-around. If so, handle the top
279          * of the descriptor ring first, then do the bottom.  If not,
280          * the processing looks just like the "bottom" part anyway...
281          */
282         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
285
286                 /*
287                  * We know that the last descriptor in the ring will need to
288                  * have its RS bit set because tx_rs_thresh has to be
289                  * a divisor of the ring size
290                  */
291                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
294
295                 txq->tx_tail = 0;
296         }
297
298         /* Fill H/W descriptor ring with mbuf data */
299         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
301
302         /*
303          * Determine if RS bit should be set
304          * This is what we actually want:
305          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306          * but instead of subtracting 1 and doing >=, we can just do
307          * greater than without subtracting.
308          */
309         if (txq->tx_tail > txq->tx_next_rs) {
310                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
313                                                 txq->tx_rs_thresh);
314                 if (txq->tx_next_rs >= txq->nb_tx_desc)
315                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
316         }
317
318         /*
319          * Check for wrap-around. This would only happen if we used
320          * up to the last descriptor in the ring, no more, no less.
321          */
322         if (txq->tx_tail >= txq->nb_tx_desc)
323                 txq->tx_tail = 0;
324
325         /* update tail pointer */
326         rte_wmb();
327         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
328
329         return nb_pkts;
330 }
331
332 uint16_t
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
334                        uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337
338         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
341
342         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
343         nb_tx = 0;
344         while (nb_pkts) {
345                 uint16_t ret, n;
346                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348                 nb_tx = (uint16_t)(nb_tx + ret);
349                 nb_pkts = (uint16_t)(nb_pkts - ret);
350                 if (ret < n)
351                         break;
352         }
353
354         return nb_tx;
355 }
356
357 static inline void
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
361 {
362         uint32_t type_tucmd_mlhl;
363         uint32_t mss_l4len_idx = 0;
364         uint32_t ctx_idx;
365         uint32_t vlan_macip_lens;
366         union ixgbe_tx_offload tx_offload_mask;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         txq->ctx_cache[ctx_idx].flags = ol_flags;
434         txq->ctx_cache[ctx_idx].tx_offload.data  =
435                 tx_offload_mask.data & tx_offload.data;
436         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
437
438         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
439         vlan_macip_lens = tx_offload.l3_len;
440         vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
441         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
442         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
443         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
444         ctx_txd->seqnum_seed     = 0;
445 }
446
447 /*
448  * Check which hardware context can be used. Use the existing match
449  * or create a new context descriptor.
450  */
451 static inline uint32_t
452 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
453                 union ixgbe_tx_offload tx_offload)
454 {
455         /* If match with the current used context */
456         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
457                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
458                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
459                         return txq->ctx_curr;
460         }
461
462         /* What if match with the next context  */
463         txq->ctx_curr ^= 1;
464         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
465                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
466                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
467                         return txq->ctx_curr;
468         }
469
470         /* Mismatch, use the previous context */
471         return (IXGBE_CTX_NUM);
472 }
473
474 static inline uint32_t
475 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
476 {
477         uint32_t tmp = 0;
478         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
479                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
480         if (ol_flags & PKT_TX_IP_CKSUM)
481                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
482         if (ol_flags & PKT_TX_TCP_SEG)
483                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
484         return tmp;
485 }
486
487 static inline uint32_t
488 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
489 {
490         uint32_t cmdtype = 0;
491         if (ol_flags & PKT_TX_VLAN_PKT)
492                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
493         if (ol_flags & PKT_TX_TCP_SEG)
494                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
495         return cmdtype;
496 }
497
498 /* Default RS bit threshold values */
499 #ifndef DEFAULT_TX_RS_THRESH
500 #define DEFAULT_TX_RS_THRESH   32
501 #endif
502 #ifndef DEFAULT_TX_FREE_THRESH
503 #define DEFAULT_TX_FREE_THRESH 32
504 #endif
505
506 /* Reset transmit descriptors after they have been used */
507 static inline int
508 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
509 {
510         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
511         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
512         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
513         uint16_t nb_tx_desc = txq->nb_tx_desc;
514         uint16_t desc_to_clean_to;
515         uint16_t nb_tx_to_clean;
516         uint32_t status;
517
518         /* Determine the last descriptor needing to be cleaned */
519         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
520         if (desc_to_clean_to >= nb_tx_desc)
521                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
522
523         /* Check to make sure the last descriptor to clean is done */
524         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
525         status = txr[desc_to_clean_to].wb.status;
526         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
527         {
528                 PMD_TX_FREE_LOG(DEBUG,
529                                 "TX descriptor %4u is not done"
530                                 "(port=%d queue=%d)",
531                                 desc_to_clean_to,
532                                 txq->port_id, txq->queue_id);
533                 /* Failed to clean any descriptors, better luck next time */
534                 return -(1);
535         }
536
537         /* Figure out how many descriptors will be cleaned */
538         if (last_desc_cleaned > desc_to_clean_to)
539                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
540                                                         desc_to_clean_to);
541         else
542                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
543                                                 last_desc_cleaned);
544
545         PMD_TX_FREE_LOG(DEBUG,
546                         "Cleaning %4u TX descriptors: %4u to %4u "
547                         "(port=%d queue=%d)",
548                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
549                         txq->port_id, txq->queue_id);
550
551         /*
552          * The last descriptor to clean is done, so that means all the
553          * descriptors from the last descriptor that was cleaned
554          * up to the last descriptor with the RS bit set
555          * are done. Only reset the threshold descriptor.
556          */
557         txr[desc_to_clean_to].wb.status = 0;
558
559         /* Update the txq to reflect the last descriptor that was cleaned */
560         txq->last_desc_cleaned = desc_to_clean_to;
561         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
562
563         /* No Error */
564         return (0);
565 }
566
567 uint16_t
568 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
569                 uint16_t nb_pkts)
570 {
571         struct ixgbe_tx_queue *txq;
572         struct ixgbe_tx_entry *sw_ring;
573         struct ixgbe_tx_entry *txe, *txn;
574         volatile union ixgbe_adv_tx_desc *txr;
575         volatile union ixgbe_adv_tx_desc *txd, *txp;
576         struct rte_mbuf     *tx_pkt;
577         struct rte_mbuf     *m_seg;
578         uint64_t buf_dma_addr;
579         uint32_t olinfo_status;
580         uint32_t cmd_type_len;
581         uint32_t pkt_len;
582         uint16_t slen;
583         uint64_t ol_flags;
584         uint16_t tx_id;
585         uint16_t tx_last;
586         uint16_t nb_tx;
587         uint16_t nb_used;
588         uint64_t tx_ol_req;
589         uint32_t ctx = 0;
590         uint32_t new_ctx;
591         union ixgbe_tx_offload tx_offload = {0};
592
593         txq = tx_queue;
594         sw_ring = txq->sw_ring;
595         txr     = txq->tx_ring;
596         tx_id   = txq->tx_tail;
597         txe = &sw_ring[tx_id];
598         txp = NULL;
599
600         /* Determine if the descriptor ring needs to be cleaned. */
601         if (txq->nb_tx_free < txq->tx_free_thresh)
602                 ixgbe_xmit_cleanup(txq);
603
604         rte_prefetch0(&txe->mbuf->pool);
605
606         /* TX loop */
607         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
608                 new_ctx = 0;
609                 tx_pkt = *tx_pkts++;
610                 pkt_len = tx_pkt->pkt_len;
611
612                 /*
613                  * Determine how many (if any) context descriptors
614                  * are needed for offload functionality.
615                  */
616                 ol_flags = tx_pkt->ol_flags;
617
618                 /* If hardware offload required */
619                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
620                 if (tx_ol_req) {
621                         tx_offload.l2_len = tx_pkt->l2_len;
622                         tx_offload.l3_len = tx_pkt->l3_len;
623                         tx_offload.l4_len = tx_pkt->l4_len;
624                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
625                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
626
627                         /* If new context need be built or reuse the exist ctx. */
628                         ctx = what_advctx_update(txq, tx_ol_req,
629                                 tx_offload);
630                         /* Only allocate context descriptor if required*/
631                         new_ctx = (ctx == IXGBE_CTX_NUM);
632                         ctx = txq->ctx_curr;
633                 }
634
635                 /*
636                  * Keep track of how many descriptors are used this loop
637                  * This will always be the number of segments + the number of
638                  * Context descriptors required to transmit the packet
639                  */
640                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
641
642                 if (txp != NULL &&
643                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
644                         /* set RS on the previous packet in the burst */
645                         txp->read.cmd_type_len |=
646                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
647
648                 /*
649                  * The number of descriptors that must be allocated for a
650                  * packet is the number of segments of that packet, plus 1
651                  * Context Descriptor for the hardware offload, if any.
652                  * Determine the last TX descriptor to allocate in the TX ring
653                  * for the packet, starting from the current position (tx_id)
654                  * in the ring.
655                  */
656                 tx_last = (uint16_t) (tx_id + nb_used - 1);
657
658                 /* Circular ring */
659                 if (tx_last >= txq->nb_tx_desc)
660                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
661
662                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
663                            " tx_first=%u tx_last=%u",
664                            (unsigned) txq->port_id,
665                            (unsigned) txq->queue_id,
666                            (unsigned) pkt_len,
667                            (unsigned) tx_id,
668                            (unsigned) tx_last);
669
670                 /*
671                  * Make sure there are enough TX descriptors available to
672                  * transmit the entire packet.
673                  * nb_used better be less than or equal to txq->tx_rs_thresh
674                  */
675                 if (nb_used > txq->nb_tx_free) {
676                         PMD_TX_FREE_LOG(DEBUG,
677                                         "Not enough free TX descriptors "
678                                         "nb_used=%4u nb_free=%4u "
679                                         "(port=%d queue=%d)",
680                                         nb_used, txq->nb_tx_free,
681                                         txq->port_id, txq->queue_id);
682
683                         if (ixgbe_xmit_cleanup(txq) != 0) {
684                                 /* Could not clean any descriptors */
685                                 if (nb_tx == 0)
686                                         return (0);
687                                 goto end_of_tx;
688                         }
689
690                         /* nb_used better be <= txq->tx_rs_thresh */
691                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
692                                 PMD_TX_FREE_LOG(DEBUG,
693                                         "The number of descriptors needed to "
694                                         "transmit the packet exceeds the "
695                                         "RS bit threshold. This will impact "
696                                         "performance."
697                                         "nb_used=%4u nb_free=%4u "
698                                         "tx_rs_thresh=%4u. "
699                                         "(port=%d queue=%d)",
700                                         nb_used, txq->nb_tx_free,
701                                         txq->tx_rs_thresh,
702                                         txq->port_id, txq->queue_id);
703                                 /*
704                                  * Loop here until there are enough TX
705                                  * descriptors or until the ring cannot be
706                                  * cleaned.
707                                  */
708                                 while (nb_used > txq->nb_tx_free) {
709                                         if (ixgbe_xmit_cleanup(txq) != 0) {
710                                                 /*
711                                                  * Could not clean any
712                                                  * descriptors
713                                                  */
714                                                 if (nb_tx == 0)
715                                                         return (0);
716                                                 goto end_of_tx;
717                                         }
718                                 }
719                         }
720                 }
721
722                 /*
723                  * By now there are enough free TX descriptors to transmit
724                  * the packet.
725                  */
726
727                 /*
728                  * Set common flags of all TX Data Descriptors.
729                  *
730                  * The following bits must be set in all Data Descriptors:
731                  *   - IXGBE_ADVTXD_DTYP_DATA
732                  *   - IXGBE_ADVTXD_DCMD_DEXT
733                  *
734                  * The following bits must be set in the first Data Descriptor
735                  * and are ignored in the other ones:
736                  *   - IXGBE_ADVTXD_DCMD_IFCS
737                  *   - IXGBE_ADVTXD_MAC_1588
738                  *   - IXGBE_ADVTXD_DCMD_VLE
739                  *
740                  * The following bits must only be set in the last Data
741                  * Descriptor:
742                  *   - IXGBE_TXD_CMD_EOP
743                  *
744                  * The following bits can be set in any Data Descriptor, but
745                  * are only set in the last Data Descriptor:
746                  *   - IXGBE_TXD_CMD_RS
747                  */
748                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
749                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
750
751 #ifdef RTE_LIBRTE_IEEE1588
752                 if (ol_flags & PKT_TX_IEEE1588_TMST)
753                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
754 #endif
755
756                 olinfo_status = 0;
757                 if (tx_ol_req) {
758
759                         if (ol_flags & PKT_TX_TCP_SEG) {
760                                 /* when TSO is on, paylen in descriptor is the
761                                  * not the packet len but the tcp payload len */
762                                 pkt_len -= (tx_offload.l2_len +
763                                         tx_offload.l3_len + tx_offload.l4_len);
764                         }
765
766                         /*
767                          * Setup the TX Advanced Context Descriptor if required
768                          */
769                         if (new_ctx) {
770                                 volatile struct ixgbe_adv_tx_context_desc *
771                                     ctx_txd;
772
773                                 ctx_txd = (volatile struct
774                                     ixgbe_adv_tx_context_desc *)
775                                     &txr[tx_id];
776
777                                 txn = &sw_ring[txe->next_id];
778                                 rte_prefetch0(&txn->mbuf->pool);
779
780                                 if (txe->mbuf != NULL) {
781                                         rte_pktmbuf_free_seg(txe->mbuf);
782                                         txe->mbuf = NULL;
783                                 }
784
785                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
786                                         tx_offload);
787
788                                 txe->last_id = tx_last;
789                                 tx_id = txe->next_id;
790                                 txe = txn;
791                         }
792
793                         /*
794                          * Setup the TX Advanced Data Descriptor,
795                          * This path will go through
796                          * whatever new/reuse the context descriptor
797                          */
798                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
799                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
800                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
801                 }
802
803                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
804
805                 m_seg = tx_pkt;
806                 do {
807                         txd = &txr[tx_id];
808                         txn = &sw_ring[txe->next_id];
809                         rte_prefetch0(&txn->mbuf->pool);
810
811                         if (txe->mbuf != NULL)
812                                 rte_pktmbuf_free_seg(txe->mbuf);
813                         txe->mbuf = m_seg;
814
815                         /*
816                          * Set up Transmit Data Descriptor.
817                          */
818                         slen = m_seg->data_len;
819                         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
820                         txd->read.buffer_addr =
821                                 rte_cpu_to_le_64(buf_dma_addr);
822                         txd->read.cmd_type_len =
823                                 rte_cpu_to_le_32(cmd_type_len | slen);
824                         txd->read.olinfo_status =
825                                 rte_cpu_to_le_32(olinfo_status);
826                         txe->last_id = tx_last;
827                         tx_id = txe->next_id;
828                         txe = txn;
829                         m_seg = m_seg->next;
830                 } while (m_seg != NULL);
831
832                 /*
833                  * The last packet data descriptor needs End Of Packet (EOP)
834                  */
835                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
836                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
837                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
838
839                 /* Set RS bit only on threshold packets' last descriptor */
840                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
841                         PMD_TX_FREE_LOG(DEBUG,
842                                         "Setting RS bit on TXD id="
843                                         "%4u (port=%d queue=%d)",
844                                         tx_last, txq->port_id, txq->queue_id);
845
846                         cmd_type_len |= IXGBE_TXD_CMD_RS;
847
848                         /* Update txq RS bit counters */
849                         txq->nb_tx_used = 0;
850                         txp = NULL;
851                 } else
852                         txp = txd;
853
854                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
855         }
856
857 end_of_tx:
858         /* set RS on last packet in the burst */
859         if (txp != NULL)
860                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
861
862         rte_wmb();
863
864         /*
865          * Set the Transmit Descriptor Tail (TDT)
866          */
867         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
868                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
869                    (unsigned) tx_id, (unsigned) nb_tx);
870         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
871         txq->tx_tail = tx_id;
872
873         return (nb_tx);
874 }
875
876 /*********************************************************************
877  *
878  *  RX functions
879  *
880  **********************************************************************/
881 #define IXGBE_PACKET_TYPE_IPV4              0X01
882 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
883 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
884 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
885 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
886 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
887 #define IXGBE_PACKET_TYPE_IPV6              0X04
888 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
889 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
890 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
891 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
892 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
893 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
894 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
895 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
896 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
897 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
898 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
899 #define IXGBE_PACKET_TYPE_MAX               0X80
900 #define IXGBE_PACKET_TYPE_MASK              0X7F
901 #define IXGBE_PACKET_TYPE_SHIFT             0X04
902 static inline uint32_t
903 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
904 {
905         static const uint32_t
906                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
907                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
908                         RTE_PTYPE_L3_IPV4,
909                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
910                         RTE_PTYPE_L3_IPV4_EXT,
911                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
912                         RTE_PTYPE_L3_IPV6,
913                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
914                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
915                         RTE_PTYPE_INNER_L3_IPV6,
916                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
917                         RTE_PTYPE_L3_IPV6_EXT,
918                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
919                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
920                         RTE_PTYPE_INNER_L3_IPV6_EXT,
921                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
922                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
923                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
924                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
925                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
926                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
927                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
928                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
929                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
930                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
931                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
932                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
933                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
934                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
935                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
936                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
937                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
938                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
939                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
940                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
941                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
942                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
943                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
944                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
945                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
946                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
947                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
948                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
949         };
950         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
951                 return RTE_PTYPE_UNKNOWN;
952
953         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
954                                 IXGBE_PACKET_TYPE_MASK;
955
956         return ptype_table[pkt_info];
957 }
958
959 static inline uint64_t
960 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
961 {
962         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
963                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
964                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
965                 PKT_RX_RSS_HASH, 0, 0, 0,
966                 0, 0, 0,  PKT_RX_FDIR,
967         };
968 #ifdef RTE_LIBRTE_IEEE1588
969         static uint64_t ip_pkt_etqf_map[8] = {
970                 0, 0, 0, PKT_RX_IEEE1588_PTP,
971                 0, 0, 0, 0,
972         };
973
974         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
975                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
976                                 ip_rss_types_map[pkt_info & 0XF];
977         else
978                 return ip_rss_types_map[pkt_info & 0XF];
979 #else
980         return ip_rss_types_map[pkt_info & 0XF];
981 #endif
982 }
983
984 static inline uint64_t
985 rx_desc_status_to_pkt_flags(uint32_t rx_status)
986 {
987         uint64_t pkt_flags;
988
989         /*
990          * Check if VLAN present only.
991          * Do not check whether L3/L4 rx checksum done by NIC or not,
992          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
993          */
994         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
995
996 #ifdef RTE_LIBRTE_IEEE1588
997         if (rx_status & IXGBE_RXD_STAT_TMST)
998                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
999 #endif
1000         return pkt_flags;
1001 }
1002
1003 static inline uint64_t
1004 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1005 {
1006         /*
1007          * Bit 31: IPE, IPv4 checksum error
1008          * Bit 30: L4I, L4I integrity error
1009          */
1010         static uint64_t error_to_pkt_flags_map[4] = {
1011                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1012                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1013         };
1014         return error_to_pkt_flags_map[(rx_status >>
1015                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1016 }
1017
1018 /*
1019  * LOOK_AHEAD defines how many desc statuses to check beyond the
1020  * current descriptor.
1021  * It must be a pound define for optimal performance.
1022  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1023  * function only works with LOOK_AHEAD=8.
1024  */
1025 #define LOOK_AHEAD 8
1026 #if (LOOK_AHEAD != 8)
1027 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1028 #endif
1029 static inline int
1030 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1031 {
1032         volatile union ixgbe_adv_rx_desc *rxdp;
1033         struct ixgbe_rx_entry *rxep;
1034         struct rte_mbuf *mb;
1035         uint16_t pkt_len;
1036         uint64_t pkt_flags;
1037         int nb_dd;
1038         uint32_t s[LOOK_AHEAD];
1039         uint16_t pkt_info[LOOK_AHEAD];
1040         int i, j, nb_rx = 0;
1041         uint32_t status;
1042
1043         /* get references to current descriptor and S/W ring entry */
1044         rxdp = &rxq->rx_ring[rxq->rx_tail];
1045         rxep = &rxq->sw_ring[rxq->rx_tail];
1046
1047         status = rxdp->wb.upper.status_error;
1048         /* check to make sure there is at least 1 packet to receive */
1049         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1050                 return 0;
1051
1052         /*
1053          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1054          * reference packets that are ready to be received.
1055          */
1056         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1057              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1058         {
1059                 /* Read desc statuses backwards to avoid race condition */
1060                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1061                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1062
1063                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1064                         pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1065                                                 hs_rss.pkt_info;
1066
1067                 /* Compute how many status bits were set */
1068                 nb_dd = 0;
1069                 for (j = 0; j < LOOK_AHEAD; ++j)
1070                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1071
1072                 nb_rx += nb_dd;
1073
1074                 /* Translate descriptor info to mbuf format */
1075                 for (j = 0; j < nb_dd; ++j) {
1076                         mb = rxep[j].mbuf;
1077                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1078                                   rxq->crc_len;
1079                         mb->data_len = pkt_len;
1080                         mb->pkt_len = pkt_len;
1081                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1082
1083                         /* convert descriptor fields to rte mbuf flags */
1084                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1085                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1086                         pkt_flags |=
1087                                 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1088                         mb->ol_flags = pkt_flags;
1089                         mb->packet_type =
1090                                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1091
1092                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1093                                 mb->hash.rss = rte_le_to_cpu_32(
1094                                     rxdp[j].wb.lower.hi_dword.rss);
1095                         else if (pkt_flags & PKT_RX_FDIR) {
1096                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1097                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1098                                     IXGBE_ATR_HASH_MASK;
1099                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1100                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1101                         }
1102                 }
1103
1104                 /* Move mbuf pointers from the S/W ring to the stage */
1105                 for (j = 0; j < LOOK_AHEAD; ++j) {
1106                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1107                 }
1108
1109                 /* stop if all requested packets could not be received */
1110                 if (nb_dd != LOOK_AHEAD)
1111                         break;
1112         }
1113
1114         /* clear software ring entries so we can cleanup correctly */
1115         for (i = 0; i < nb_rx; ++i) {
1116                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1117         }
1118
1119
1120         return nb_rx;
1121 }
1122
1123 static inline int
1124 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1125 {
1126         volatile union ixgbe_adv_rx_desc *rxdp;
1127         struct ixgbe_rx_entry *rxep;
1128         struct rte_mbuf *mb;
1129         uint16_t alloc_idx;
1130         __le64 dma_addr;
1131         int diag, i;
1132
1133         /* allocate buffers in bulk directly into the S/W ring */
1134         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1135         rxep = &rxq->sw_ring[alloc_idx];
1136         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1137                                     rxq->rx_free_thresh);
1138         if (unlikely(diag != 0))
1139                 return (-ENOMEM);
1140
1141         rxdp = &rxq->rx_ring[alloc_idx];
1142         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1143                 /* populate the static rte mbuf fields */
1144                 mb = rxep[i].mbuf;
1145                 if (reset_mbuf) {
1146                         mb->next = NULL;
1147                         mb->nb_segs = 1;
1148                         mb->port = rxq->port_id;
1149                 }
1150
1151                 rte_mbuf_refcnt_set(mb, 1);
1152                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1153
1154                 /* populate the descriptors */
1155                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
1156                 rxdp[i].read.hdr_addr = 0;
1157                 rxdp[i].read.pkt_addr = dma_addr;
1158         }
1159
1160         /* update state of internal queue structure */
1161         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1162         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1163                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1164
1165         /* no errors */
1166         return 0;
1167 }
1168
1169 static inline uint16_t
1170 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1171                          uint16_t nb_pkts)
1172 {
1173         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1174         int i;
1175
1176         /* how many packets are ready to return? */
1177         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1178
1179         /* copy mbuf pointers to the application's packet list */
1180         for (i = 0; i < nb_pkts; ++i)
1181                 rx_pkts[i] = stage[i];
1182
1183         /* update internal queue state */
1184         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1185         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1186
1187         return nb_pkts;
1188 }
1189
1190 static inline uint16_t
1191 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1192              uint16_t nb_pkts)
1193 {
1194         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1195         uint16_t nb_rx = 0;
1196
1197         /* Any previously recv'd pkts will be returned from the Rx stage */
1198         if (rxq->rx_nb_avail)
1199                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1200
1201         /* Scan the H/W ring for packets to receive */
1202         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1203
1204         /* update internal queue state */
1205         rxq->rx_next_avail = 0;
1206         rxq->rx_nb_avail = nb_rx;
1207         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1208
1209         /* if required, allocate new buffers to replenish descriptors */
1210         if (rxq->rx_tail > rxq->rx_free_trigger) {
1211                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1212
1213                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1214                         int i, j;
1215                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1216                                    "queue_id=%u", (unsigned) rxq->port_id,
1217                                    (unsigned) rxq->queue_id);
1218
1219                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1220                                 rxq->rx_free_thresh;
1221
1222                         /*
1223                          * Need to rewind any previous receives if we cannot
1224                          * allocate new buffers to replenish the old ones.
1225                          */
1226                         rxq->rx_nb_avail = 0;
1227                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1228                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1229                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1230
1231                         return 0;
1232                 }
1233
1234                 /* update tail pointer */
1235                 rte_wmb();
1236                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1237         }
1238
1239         if (rxq->rx_tail >= rxq->nb_rx_desc)
1240                 rxq->rx_tail = 0;
1241
1242         /* received any packets this loop? */
1243         if (rxq->rx_nb_avail)
1244                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1245
1246         return 0;
1247 }
1248
1249 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1250 static uint16_t
1251 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1252                            uint16_t nb_pkts)
1253 {
1254         uint16_t nb_rx;
1255
1256         if (unlikely(nb_pkts == 0))
1257                 return 0;
1258
1259         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1260                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1261
1262         /* request is relatively large, chunk it up */
1263         nb_rx = 0;
1264         while (nb_pkts) {
1265                 uint16_t ret, n;
1266                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1267                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1268                 nb_rx = (uint16_t)(nb_rx + ret);
1269                 nb_pkts = (uint16_t)(nb_pkts - ret);
1270                 if (ret < n)
1271                         break;
1272         }
1273
1274         return nb_rx;
1275 }
1276
1277 uint16_t
1278 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1279                 uint16_t nb_pkts)
1280 {
1281         struct ixgbe_rx_queue *rxq;
1282         volatile union ixgbe_adv_rx_desc *rx_ring;
1283         volatile union ixgbe_adv_rx_desc *rxdp;
1284         struct ixgbe_rx_entry *sw_ring;
1285         struct ixgbe_rx_entry *rxe;
1286         struct rte_mbuf *rxm;
1287         struct rte_mbuf *nmb;
1288         union ixgbe_adv_rx_desc rxd;
1289         uint64_t dma_addr;
1290         uint32_t staterr;
1291         uint32_t pkt_info;
1292         uint16_t pkt_len;
1293         uint16_t rx_id;
1294         uint16_t nb_rx;
1295         uint16_t nb_hold;
1296         uint64_t pkt_flags;
1297
1298         nb_rx = 0;
1299         nb_hold = 0;
1300         rxq = rx_queue;
1301         rx_id = rxq->rx_tail;
1302         rx_ring = rxq->rx_ring;
1303         sw_ring = rxq->sw_ring;
1304         while (nb_rx < nb_pkts) {
1305                 /*
1306                  * The order of operations here is important as the DD status
1307                  * bit must not be read after any other descriptor fields.
1308                  * rx_ring and rxdp are pointing to volatile data so the order
1309                  * of accesses cannot be reordered by the compiler. If they were
1310                  * not volatile, they could be reordered which could lead to
1311                  * using invalid descriptor fields when read from rxd.
1312                  */
1313                 rxdp = &rx_ring[rx_id];
1314                 staterr = rxdp->wb.upper.status_error;
1315                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1316                         break;
1317                 rxd = *rxdp;
1318
1319                 /*
1320                  * End of packet.
1321                  *
1322                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1323                  * is likely to be invalid and to be dropped by the various
1324                  * validation checks performed by the network stack.
1325                  *
1326                  * Allocate a new mbuf to replenish the RX ring descriptor.
1327                  * If the allocation fails:
1328                  *    - arrange for that RX descriptor to be the first one
1329                  *      being parsed the next time the receive function is
1330                  *      invoked [on the same queue].
1331                  *
1332                  *    - Stop parsing the RX ring and return immediately.
1333                  *
1334                  * This policy do not drop the packet received in the RX
1335                  * descriptor for which the allocation of a new mbuf failed.
1336                  * Thus, it allows that packet to be later retrieved if
1337                  * mbuf have been freed in the mean time.
1338                  * As a side effect, holding RX descriptors instead of
1339                  * systematically giving them back to the NIC may lead to
1340                  * RX ring exhaustion situations.
1341                  * However, the NIC can gracefully prevent such situations
1342                  * to happen by sending specific "back-pressure" flow control
1343                  * frames to its peer(s).
1344                  */
1345                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1346                            "ext_err_stat=0x%08x pkt_len=%u",
1347                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1348                            (unsigned) rx_id, (unsigned) staterr,
1349                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1350
1351                 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1352                 if (nmb == NULL) {
1353                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1354                                    "queue_id=%u", (unsigned) rxq->port_id,
1355                                    (unsigned) rxq->queue_id);
1356                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1357                         break;
1358                 }
1359
1360                 nb_hold++;
1361                 rxe = &sw_ring[rx_id];
1362                 rx_id++;
1363                 if (rx_id == rxq->nb_rx_desc)
1364                         rx_id = 0;
1365
1366                 /* Prefetch next mbuf while processing current one. */
1367                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1368
1369                 /*
1370                  * When next RX descriptor is on a cache-line boundary,
1371                  * prefetch the next 4 RX descriptors and the next 8 pointers
1372                  * to mbufs.
1373                  */
1374                 if ((rx_id & 0x3) == 0) {
1375                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1376                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1377                 }
1378
1379                 rxm = rxe->mbuf;
1380                 rxe->mbuf = nmb;
1381                 dma_addr =
1382                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1383                 rxdp->read.hdr_addr = 0;
1384                 rxdp->read.pkt_addr = dma_addr;
1385
1386                 /*
1387                  * Initialize the returned mbuf.
1388                  * 1) setup generic mbuf fields:
1389                  *    - number of segments,
1390                  *    - next segment,
1391                  *    - packet length,
1392                  *    - RX port identifier.
1393                  * 2) integrate hardware offload data, if any:
1394                  *    - RSS flag & hash,
1395                  *    - IP checksum flag,
1396                  *    - VLAN TCI, if any,
1397                  *    - error flags.
1398                  */
1399                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1400                                       rxq->crc_len);
1401                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1402                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1403                 rxm->nb_segs = 1;
1404                 rxm->next = NULL;
1405                 rxm->pkt_len = pkt_len;
1406                 rxm->data_len = pkt_len;
1407                 rxm->port = rxq->port_id;
1408
1409                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1410                                                                 pkt_info);
1411                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1412                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1413
1414                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1415                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1416                 pkt_flags = pkt_flags |
1417                         ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1418                 rxm->ol_flags = pkt_flags;
1419                 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1420
1421                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1422                         rxm->hash.rss = rte_le_to_cpu_32(
1423                                                 rxd.wb.lower.hi_dword.rss);
1424                 else if (pkt_flags & PKT_RX_FDIR) {
1425                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1426                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1427                                         IXGBE_ATR_HASH_MASK;
1428                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1429                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1430                 }
1431                 /*
1432                  * Store the mbuf address into the next entry of the array
1433                  * of returned packets.
1434                  */
1435                 rx_pkts[nb_rx++] = rxm;
1436         }
1437         rxq->rx_tail = rx_id;
1438
1439         /*
1440          * If the number of free RX descriptors is greater than the RX free
1441          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1442          * register.
1443          * Update the RDT with the value of the last processed RX descriptor
1444          * minus 1, to guarantee that the RDT register is never equal to the
1445          * RDH register, which creates a "full" ring situtation from the
1446          * hardware point of view...
1447          */
1448         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1449         if (nb_hold > rxq->rx_free_thresh) {
1450                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1451                            "nb_hold=%u nb_rx=%u",
1452                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1453                            (unsigned) rx_id, (unsigned) nb_hold,
1454                            (unsigned) nb_rx);
1455                 rx_id = (uint16_t) ((rx_id == 0) ?
1456                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1457                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1458                 nb_hold = 0;
1459         }
1460         rxq->nb_rx_hold = nb_hold;
1461         return (nb_rx);
1462 }
1463
1464 /**
1465  * Detect an RSC descriptor.
1466  */
1467 static inline uint32_t
1468 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1469 {
1470         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1471                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1472 }
1473
1474 /**
1475  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1476  *
1477  * Fill the following info in the HEAD buffer of the Rx cluster:
1478  *    - RX port identifier
1479  *    - hardware offload data, if any:
1480  *      - RSS flag & hash
1481  *      - IP checksum flag
1482  *      - VLAN TCI, if any
1483  *      - error flags
1484  * @head HEAD of the packet cluster
1485  * @desc HW descriptor to get data from
1486  * @port_id Port ID of the Rx queue
1487  */
1488 static inline void
1489 ixgbe_fill_cluster_head_buf(
1490         struct rte_mbuf *head,
1491         union ixgbe_adv_rx_desc *desc,
1492         uint8_t port_id,
1493         uint32_t staterr)
1494 {
1495         uint16_t pkt_info;
1496         uint64_t pkt_flags;
1497
1498         head->port = port_id;
1499
1500         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1501          * set in the pkt_flags field.
1502          */
1503         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1504         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1505         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1506         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1507         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1508         head->ol_flags = pkt_flags;
1509         head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1510
1511         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1512                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1513         else if (pkt_flags & PKT_RX_FDIR) {
1514                 head->hash.fdir.hash =
1515                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1516                                                           & IXGBE_ATR_HASH_MASK;
1517                 head->hash.fdir.id =
1518                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1519         }
1520 }
1521
1522 /**
1523  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1524  *
1525  * @rx_queue Rx queue handle
1526  * @rx_pkts table of received packets
1527  * @nb_pkts size of rx_pkts table
1528  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1529  *
1530  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1531  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1532  *
1533  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1534  * 1) When non-EOP RSC completion arrives:
1535  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1536  *       segment's data length.
1537  *    b) Set the "next" pointer of the current segment to point to the segment
1538  *       at the NEXTP index.
1539  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1540  *       in the sw_rsc_ring.
1541  * 2) When EOP arrives we just update the cluster's total length and offload
1542  *    flags and deliver the cluster up to the upper layers. In our case - put it
1543  *    in the rx_pkts table.
1544  *
1545  * Returns the number of received packets/clusters (according to the "bulk
1546  * receive" interface).
1547  */
1548 static inline uint16_t
1549 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1550                     bool bulk_alloc)
1551 {
1552         struct ixgbe_rx_queue *rxq = rx_queue;
1553         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1554         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1555         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1556         uint16_t rx_id = rxq->rx_tail;
1557         uint16_t nb_rx = 0;
1558         uint16_t nb_hold = rxq->nb_rx_hold;
1559         uint16_t prev_id = rxq->rx_tail;
1560
1561         while (nb_rx < nb_pkts) {
1562                 bool eop;
1563                 struct ixgbe_rx_entry *rxe;
1564                 struct ixgbe_scattered_rx_entry *sc_entry;
1565                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1566                 struct ixgbe_rx_entry *next_rxe;
1567                 struct rte_mbuf *first_seg;
1568                 struct rte_mbuf *rxm;
1569                 struct rte_mbuf *nmb;
1570                 union ixgbe_adv_rx_desc rxd;
1571                 uint16_t data_len;
1572                 uint16_t next_id;
1573                 volatile union ixgbe_adv_rx_desc *rxdp;
1574                 uint32_t staterr;
1575
1576 next_desc:
1577                 /*
1578                  * The code in this whole file uses the volatile pointer to
1579                  * ensure the read ordering of the status and the rest of the
1580                  * descriptor fields (on the compiler level only!!!). This is so
1581                  * UGLY - why not to just use the compiler barrier instead? DPDK
1582                  * even has the rte_compiler_barrier() for that.
1583                  *
1584                  * But most importantly this is just wrong because this doesn't
1585                  * ensure memory ordering in a general case at all. For
1586                  * instance, DPDK is supposed to work on Power CPUs where
1587                  * compiler barrier may just not be enough!
1588                  *
1589                  * I tried to write only this function properly to have a
1590                  * starting point (as a part of an LRO/RSC series) but the
1591                  * compiler cursed at me when I tried to cast away the
1592                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1593                  * keeping it the way it is for now.
1594                  *
1595                  * The code in this file is broken in so many other places and
1596                  * will just not work on a big endian CPU anyway therefore the
1597                  * lines below will have to be revisited together with the rest
1598                  * of the ixgbe PMD.
1599                  *
1600                  * TODO:
1601                  *    - Get rid of "volatile" crap and let the compiler do its
1602                  *      job.
1603                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1604                  *      memory ordering below.
1605                  */
1606                 rxdp = &rx_ring[rx_id];
1607                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1608
1609                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1610                         break;
1611
1612                 rxd = *rxdp;
1613
1614                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1615                                   "staterr=0x%x data_len=%u",
1616                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1617                            rte_le_to_cpu_16(rxd.wb.upper.length));
1618
1619                 if (!bulk_alloc) {
1620                         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1621                         if (nmb == NULL) {
1622                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1623                                                   "port_id=%u queue_id=%u",
1624                                            rxq->port_id, rxq->queue_id);
1625
1626                                 rte_eth_devices[rxq->port_id].data->
1627                                                         rx_mbuf_alloc_failed++;
1628                                 break;
1629                         }
1630                 }
1631                 else if (nb_hold > rxq->rx_free_thresh) {
1632                         uint16_t next_rdt = rxq->rx_free_trigger;
1633
1634                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1635                                 rte_wmb();
1636                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1637                                                     next_rdt);
1638                                 nb_hold -= rxq->rx_free_thresh;
1639                         } else {
1640                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1641                                                   "port_id=%u queue_id=%u",
1642                                            rxq->port_id, rxq->queue_id);
1643
1644                                 rte_eth_devices[rxq->port_id].data->
1645                                                         rx_mbuf_alloc_failed++;
1646                                 break;
1647                         }
1648                 }
1649
1650                 nb_hold++;
1651                 rxe = &sw_ring[rx_id];
1652                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1653
1654                 next_id = rx_id + 1;
1655                 if (next_id == rxq->nb_rx_desc)
1656                         next_id = 0;
1657
1658                 /* Prefetch next mbuf while processing current one. */
1659                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1660
1661                 /*
1662                  * When next RX descriptor is on a cache-line boundary,
1663                  * prefetch the next 4 RX descriptors and the next 4 pointers
1664                  * to mbufs.
1665                  */
1666                 if ((next_id & 0x3) == 0) {
1667                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1668                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1669                 }
1670
1671                 rxm = rxe->mbuf;
1672
1673                 if (!bulk_alloc) {
1674                         __le64 dma =
1675                           rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1676                         /*
1677                          * Update RX descriptor with the physical address of the
1678                          * new data buffer of the new allocated mbuf.
1679                          */
1680                         rxe->mbuf = nmb;
1681
1682                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1683                         rxdp->read.hdr_addr = 0;
1684                         rxdp->read.pkt_addr = dma;
1685                 } else
1686                         rxe->mbuf = NULL;
1687
1688                 /*
1689                  * Set data length & data buffer address of mbuf.
1690                  */
1691                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1692                 rxm->data_len = data_len;
1693
1694                 if (!eop) {
1695                         uint16_t nextp_id;
1696                         /*
1697                          * Get next descriptor index:
1698                          *  - For RSC it's in the NEXTP field.
1699                          *  - For a scattered packet - it's just a following
1700                          *    descriptor.
1701                          */
1702                         if (ixgbe_rsc_count(&rxd))
1703                                 nextp_id =
1704                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1705                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1706                         else
1707                                 nextp_id = next_id;
1708
1709                         next_sc_entry = &sw_sc_ring[nextp_id];
1710                         next_rxe = &sw_ring[nextp_id];
1711                         rte_ixgbe_prefetch(next_rxe);
1712                 }
1713
1714                 sc_entry = &sw_sc_ring[rx_id];
1715                 first_seg = sc_entry->fbuf;
1716                 sc_entry->fbuf = NULL;
1717
1718                 /*
1719                  * If this is the first buffer of the received packet,
1720                  * set the pointer to the first mbuf of the packet and
1721                  * initialize its context.
1722                  * Otherwise, update the total length and the number of segments
1723                  * of the current scattered packet, and update the pointer to
1724                  * the last mbuf of the current packet.
1725                  */
1726                 if (first_seg == NULL) {
1727                         first_seg = rxm;
1728                         first_seg->pkt_len = data_len;
1729                         first_seg->nb_segs = 1;
1730                 } else {
1731                         first_seg->pkt_len += data_len;
1732                         first_seg->nb_segs++;
1733                 }
1734
1735                 prev_id = rx_id;
1736                 rx_id = next_id;
1737
1738                 /*
1739                  * If this is not the last buffer of the received packet, update
1740                  * the pointer to the first mbuf at the NEXTP entry in the
1741                  * sw_sc_ring and continue to parse the RX ring.
1742                  */
1743                 if (!eop) {
1744                         rxm->next = next_rxe->mbuf;
1745                         next_sc_entry->fbuf = first_seg;
1746                         goto next_desc;
1747                 }
1748
1749                 /*
1750                  * This is the last buffer of the received packet - return
1751                  * the current cluster to the user.
1752                  */
1753                 rxm->next = NULL;
1754
1755                 /* Initialize the first mbuf of the returned packet */
1756                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1757                                             staterr);
1758
1759                 /*
1760                  * Deal with the case, when HW CRC srip is disabled.
1761                  * That can't happen when LRO is enabled, but still could
1762                  * happen for scattered RX mode.
1763                  */
1764                 first_seg->pkt_len -= rxq->crc_len;
1765                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1766                         struct rte_mbuf *lp;
1767
1768                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
1769                                 ;
1770
1771                         first_seg->nb_segs--;
1772                         lp->data_len -= rxq->crc_len - rxm->data_len;
1773                         lp->next = NULL;
1774                         rte_pktmbuf_free_seg(rxm);
1775                 } else
1776                         rxm->data_len -= rxq->crc_len;
1777
1778                 /* Prefetch data of first segment, if configured to do so. */
1779                 rte_packet_prefetch((char *)first_seg->buf_addr +
1780                         first_seg->data_off);
1781
1782                 /*
1783                  * Store the mbuf address into the next entry of the array
1784                  * of returned packets.
1785                  */
1786                 rx_pkts[nb_rx++] = first_seg;
1787         }
1788
1789         /*
1790          * Record index of the next RX descriptor to probe.
1791          */
1792         rxq->rx_tail = rx_id;
1793
1794         /*
1795          * If the number of free RX descriptors is greater than the RX free
1796          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1797          * register.
1798          * Update the RDT with the value of the last processed RX descriptor
1799          * minus 1, to guarantee that the RDT register is never equal to the
1800          * RDH register, which creates a "full" ring situtation from the
1801          * hardware point of view...
1802          */
1803         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805                            "nb_hold=%u nb_rx=%u",
1806                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1807
1808                 rte_wmb();
1809                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1810                 nb_hold = 0;
1811         }
1812
1813         rxq->nb_rx_hold = nb_hold;
1814         return nb_rx;
1815 }
1816
1817 uint16_t
1818 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1819                                  uint16_t nb_pkts)
1820 {
1821         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1822 }
1823
1824 uint16_t
1825 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1826                                uint16_t nb_pkts)
1827 {
1828         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1829 }
1830
1831 /*********************************************************************
1832  *
1833  *  Queue management functions
1834  *
1835  **********************************************************************/
1836
1837 /*
1838  * Create memzone for HW rings. malloc can't be used as the physical address is
1839  * needed. If the memzone is already created, then this function returns a ptr
1840  * to the old one.
1841  */
1842 static const struct rte_memzone * __attribute__((cold))
1843 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1844                       uint16_t queue_id, uint32_t ring_size, int socket_id)
1845 {
1846         char z_name[RTE_MEMZONE_NAMESIZE];
1847         const struct rte_memzone *mz;
1848
1849         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1850                         dev->driver->pci_drv.name, ring_name,
1851                         dev->data->port_id, queue_id);
1852
1853         mz = rte_memzone_lookup(z_name);
1854         if (mz)
1855                 return mz;
1856
1857 #ifdef RTE_LIBRTE_XEN_DOM0
1858         return rte_memzone_reserve_bounded(z_name, ring_size,
1859                 socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
1860 #else
1861         return rte_memzone_reserve_aligned(z_name, ring_size,
1862                 socket_id, 0, IXGBE_ALIGN);
1863 #endif
1864 }
1865
1866 static void __attribute__((cold))
1867 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1868 {
1869         unsigned i;
1870
1871         if (txq->sw_ring != NULL) {
1872                 for (i = 0; i < txq->nb_tx_desc; i++) {
1873                         if (txq->sw_ring[i].mbuf != NULL) {
1874                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1875                                 txq->sw_ring[i].mbuf = NULL;
1876                         }
1877                 }
1878         }
1879 }
1880
1881 static void __attribute__((cold))
1882 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1883 {
1884         if (txq != NULL &&
1885             txq->sw_ring != NULL)
1886                 rte_free(txq->sw_ring);
1887 }
1888
1889 static void __attribute__((cold))
1890 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1891 {
1892         if (txq != NULL && txq->ops != NULL) {
1893                 txq->ops->release_mbufs(txq);
1894                 txq->ops->free_swring(txq);
1895                 rte_free(txq);
1896         }
1897 }
1898
1899 void __attribute__((cold))
1900 ixgbe_dev_tx_queue_release(void *txq)
1901 {
1902         ixgbe_tx_queue_release(txq);
1903 }
1904
1905 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1906 static void __attribute__((cold))
1907 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1908 {
1909         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1910         struct ixgbe_tx_entry *txe = txq->sw_ring;
1911         uint16_t prev, i;
1912
1913         /* Zero out HW ring memory */
1914         for (i = 0; i < txq->nb_tx_desc; i++) {
1915                 txq->tx_ring[i] = zeroed_desc;
1916         }
1917
1918         /* Initialize SW ring entries */
1919         prev = (uint16_t) (txq->nb_tx_desc - 1);
1920         for (i = 0; i < txq->nb_tx_desc; i++) {
1921                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1922                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1923                 txe[i].mbuf = NULL;
1924                 txe[i].last_id = i;
1925                 txe[prev].next_id = i;
1926                 prev = i;
1927         }
1928
1929         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1930         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1931
1932         txq->tx_tail = 0;
1933         txq->nb_tx_used = 0;
1934         /*
1935          * Always allow 1 descriptor to be un-allocated to avoid
1936          * a H/W race condition
1937          */
1938         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1939         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1940         txq->ctx_curr = 0;
1941         memset((void*)&txq->ctx_cache, 0,
1942                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1943 }
1944
1945 static const struct ixgbe_txq_ops def_txq_ops = {
1946         .release_mbufs = ixgbe_tx_queue_release_mbufs,
1947         .free_swring = ixgbe_tx_free_swring,
1948         .reset = ixgbe_reset_tx_queue,
1949 };
1950
1951 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1952  * the queue parameters. Used in tx_queue_setup by primary process and then
1953  * in dev_init by secondary process when attaching to an existing ethdev.
1954  */
1955 void __attribute__((cold))
1956 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1957 {
1958         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1959         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1960                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1961                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1962 #ifdef RTE_IXGBE_INC_VECTOR
1963                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1964                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1965                                         ixgbe_txq_vec_setup(txq) == 0)) {
1966                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1967                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1968                 } else
1969 #endif
1970                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1971         } else {
1972                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1973                 PMD_INIT_LOG(DEBUG,
1974                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1975                                 (unsigned long)txq->txq_flags,
1976                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
1977                 PMD_INIT_LOG(DEBUG,
1978                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1979                                 (unsigned long)txq->tx_rs_thresh,
1980                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1981                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1982         }
1983 }
1984
1985 int __attribute__((cold))
1986 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1987                          uint16_t queue_idx,
1988                          uint16_t nb_desc,
1989                          unsigned int socket_id,
1990                          const struct rte_eth_txconf *tx_conf)
1991 {
1992         const struct rte_memzone *tz;
1993         struct ixgbe_tx_queue *txq;
1994         struct ixgbe_hw     *hw;
1995         uint16_t tx_rs_thresh, tx_free_thresh;
1996
1997         PMD_INIT_FUNC_TRACE();
1998         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1999
2000         /*
2001          * Validate number of transmit descriptors.
2002          * It must not exceed hardware maximum, and must be multiple
2003          * of IXGBE_ALIGN.
2004          */
2005         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2006                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2007                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2008                 return -EINVAL;
2009         }
2010
2011         /*
2012          * The following two parameters control the setting of the RS bit on
2013          * transmit descriptors.
2014          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2015          * descriptors have been used.
2016          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2017          * descriptors are used or if the number of descriptors required
2018          * to transmit a packet is greater than the number of free TX
2019          * descriptors.
2020          * The following constraints must be satisfied:
2021          *  tx_rs_thresh must be greater than 0.
2022          *  tx_rs_thresh must be less than the size of the ring minus 2.
2023          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2024          *  tx_rs_thresh must be a divisor of the ring size.
2025          *  tx_free_thresh must be greater than 0.
2026          *  tx_free_thresh must be less than the size of the ring minus 3.
2027          * One descriptor in the TX ring is used as a sentinel to avoid a
2028          * H/W race condition, hence the maximum threshold constraints.
2029          * When set to zero use default values.
2030          */
2031         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2032                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2033         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2034                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2035         if (tx_rs_thresh >= (nb_desc - 2)) {
2036                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2037                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2038                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2039                         (int)dev->data->port_id, (int)queue_idx);
2040                 return -(EINVAL);
2041         }
2042         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2043                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2044                         "(tx_rs_thresh=%u port=%d queue=%d)",
2045                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2046                         (int)dev->data->port_id, (int)queue_idx);
2047                 return -(EINVAL);
2048         }
2049         if (tx_free_thresh >= (nb_desc - 3)) {
2050                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2051                              "tx_free_thresh must be less than the number of "
2052                              "TX descriptors minus 3. (tx_free_thresh=%u "
2053                              "port=%d queue=%d)",
2054                              (unsigned int)tx_free_thresh,
2055                              (int)dev->data->port_id, (int)queue_idx);
2056                 return -(EINVAL);
2057         }
2058         if (tx_rs_thresh > tx_free_thresh) {
2059                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2060                              "tx_free_thresh. (tx_free_thresh=%u "
2061                              "tx_rs_thresh=%u port=%d queue=%d)",
2062                              (unsigned int)tx_free_thresh,
2063                              (unsigned int)tx_rs_thresh,
2064                              (int)dev->data->port_id,
2065                              (int)queue_idx);
2066                 return -(EINVAL);
2067         }
2068         if ((nb_desc % tx_rs_thresh) != 0) {
2069                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2070                              "number of TX descriptors. (tx_rs_thresh=%u "
2071                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2072                              (int)dev->data->port_id, (int)queue_idx);
2073                 return -(EINVAL);
2074         }
2075
2076         /*
2077          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2078          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2079          * by the NIC and all descriptors are written back after the NIC
2080          * accumulates WTHRESH descriptors.
2081          */
2082         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2083                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2084                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2085                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2086                              (int)dev->data->port_id, (int)queue_idx);
2087                 return -(EINVAL);
2088         }
2089
2090         /* Free memory prior to re-allocation if needed... */
2091         if (dev->data->tx_queues[queue_idx] != NULL) {
2092                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2093                 dev->data->tx_queues[queue_idx] = NULL;
2094         }
2095
2096         /* First allocate the tx queue data structure */
2097         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2098                                  RTE_CACHE_LINE_SIZE, socket_id);
2099         if (txq == NULL)
2100                 return (-ENOMEM);
2101
2102         /*
2103          * Allocate TX ring hardware descriptors. A memzone large enough to
2104          * handle the maximum ring size is allocated in order to allow for
2105          * resizing in later calls to the queue setup function.
2106          */
2107         tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
2108                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2109                         socket_id);
2110         if (tz == NULL) {
2111                 ixgbe_tx_queue_release(txq);
2112                 return (-ENOMEM);
2113         }
2114
2115         txq->nb_tx_desc = nb_desc;
2116         txq->tx_rs_thresh = tx_rs_thresh;
2117         txq->tx_free_thresh = tx_free_thresh;
2118         txq->pthresh = tx_conf->tx_thresh.pthresh;
2119         txq->hthresh = tx_conf->tx_thresh.hthresh;
2120         txq->wthresh = tx_conf->tx_thresh.wthresh;
2121         txq->queue_id = queue_idx;
2122         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2123                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2124         txq->port_id = dev->data->port_id;
2125         txq->txq_flags = tx_conf->txq_flags;
2126         txq->ops = &def_txq_ops;
2127         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2128
2129         /*
2130          * Modification to set VFTDT for virtual function if vf is detected
2131          */
2132         if (hw->mac.type == ixgbe_mac_82599_vf ||
2133             hw->mac.type == ixgbe_mac_X540_vf ||
2134             hw->mac.type == ixgbe_mac_X550_vf ||
2135             hw->mac.type == ixgbe_mac_X550EM_x_vf)
2136                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2137         else
2138                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2139 #ifndef RTE_LIBRTE_XEN_DOM0
2140         txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
2141 #else
2142         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2143 #endif
2144         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2145
2146         /* Allocate software ring */
2147         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2148                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2149                                 RTE_CACHE_LINE_SIZE, socket_id);
2150         if (txq->sw_ring == NULL) {
2151                 ixgbe_tx_queue_release(txq);
2152                 return (-ENOMEM);
2153         }
2154         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2155                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2156
2157         /* set up vector or scalar TX function as appropriate */
2158         ixgbe_set_tx_function(dev, txq);
2159
2160         txq->ops->reset(txq);
2161
2162         dev->data->tx_queues[queue_idx] = txq;
2163
2164
2165         return (0);
2166 }
2167
2168 /**
2169  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2170  *
2171  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2172  * in the sw_rsc_ring is not set to NULL but rather points to the next
2173  * mbuf of this RSC aggregation (that has not been completed yet and still
2174  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2175  * will just free first "nb_segs" segments of the cluster explicitly by calling
2176  * an rte_pktmbuf_free_seg().
2177  *
2178  * @m scattered cluster head
2179  */
2180 static void __attribute__((cold))
2181 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2182 {
2183         uint8_t i, nb_segs = m->nb_segs;
2184         struct rte_mbuf *next_seg;
2185
2186         for (i = 0; i < nb_segs; i++) {
2187                 next_seg = m->next;
2188                 rte_pktmbuf_free_seg(m);
2189                 m = next_seg;
2190         }
2191 }
2192
2193 static void __attribute__((cold))
2194 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2195 {
2196         unsigned i;
2197
2198 #ifdef RTE_IXGBE_INC_VECTOR
2199         /* SSE Vector driver has a different way of releasing mbufs. */
2200         if (rxq->rx_using_sse) {
2201                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2202                 return;
2203         }
2204 #endif
2205
2206         if (rxq->sw_ring != NULL) {
2207                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2208                         if (rxq->sw_ring[i].mbuf != NULL) {
2209                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2210                                 rxq->sw_ring[i].mbuf = NULL;
2211                         }
2212                 }
2213                 if (rxq->rx_nb_avail) {
2214                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2215                                 struct rte_mbuf *mb;
2216                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2217                                 rte_pktmbuf_free_seg(mb);
2218                         }
2219                         rxq->rx_nb_avail = 0;
2220                 }
2221         }
2222
2223         if (rxq->sw_sc_ring)
2224                 for (i = 0; i < rxq->nb_rx_desc; i++)
2225                         if (rxq->sw_sc_ring[i].fbuf) {
2226                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2227                                 rxq->sw_sc_ring[i].fbuf = NULL;
2228                         }
2229 }
2230
2231 static void __attribute__((cold))
2232 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2233 {
2234         if (rxq != NULL) {
2235                 ixgbe_rx_queue_release_mbufs(rxq);
2236                 rte_free(rxq->sw_ring);
2237                 rte_free(rxq->sw_sc_ring);
2238                 rte_free(rxq);
2239         }
2240 }
2241
2242 void __attribute__((cold))
2243 ixgbe_dev_rx_queue_release(void *rxq)
2244 {
2245         ixgbe_rx_queue_release(rxq);
2246 }
2247
2248 /*
2249  * Check if Rx Burst Bulk Alloc function can be used.
2250  * Return
2251  *        0: the preconditions are satisfied and the bulk allocation function
2252  *           can be used.
2253  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2254  *           function must be used.
2255  */
2256 static inline int __attribute__((cold))
2257 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2258 {
2259         int ret = 0;
2260
2261         /*
2262          * Make sure the following pre-conditions are satisfied:
2263          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2264          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2265          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2266          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2267          * Scattered packets are not supported.  This should be checked
2268          * outside of this function.
2269          */
2270         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2271                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2272                              "rxq->rx_free_thresh=%d, "
2273                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2274                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2275                 ret = -EINVAL;
2276         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2277                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2278                              "rxq->rx_free_thresh=%d, "
2279                              "rxq->nb_rx_desc=%d",
2280                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2281                 ret = -EINVAL;
2282         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2283                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2284                              "rxq->nb_rx_desc=%d, "
2285                              "rxq->rx_free_thresh=%d",
2286                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2287                 ret = -EINVAL;
2288         } else if (!(rxq->nb_rx_desc <
2289                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2290                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2291                              "rxq->nb_rx_desc=%d, "
2292                              "IXGBE_MAX_RING_DESC=%d, "
2293                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2294                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2295                              RTE_PMD_IXGBE_RX_MAX_BURST);
2296                 ret = -EINVAL;
2297         }
2298
2299         return ret;
2300 }
2301
2302 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2303 static void __attribute__((cold))
2304 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2305 {
2306         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2307         unsigned i;
2308         uint16_t len = rxq->nb_rx_desc;
2309
2310         /*
2311          * By default, the Rx queue setup function allocates enough memory for
2312          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2313          * extra memory at the end of the descriptor ring to be zero'd out. A
2314          * pre-condition for using the Rx burst bulk alloc function is that the
2315          * number of descriptors is less than or equal to
2316          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2317          * constraints here to see if we need to zero out memory after the end
2318          * of the H/W descriptor ring.
2319          */
2320         if (adapter->rx_bulk_alloc_allowed)
2321                 /* zero out extra memory */
2322                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2323
2324         /*
2325          * Zero out HW ring memory. Zero out extra memory at the end of
2326          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2327          * reads extra memory as zeros.
2328          */
2329         for (i = 0; i < len; i++) {
2330                 rxq->rx_ring[i] = zeroed_desc;
2331         }
2332
2333         /*
2334          * initialize extra software ring entries. Space for these extra
2335          * entries is always allocated
2336          */
2337         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2338         for (i = rxq->nb_rx_desc; i < len; ++i) {
2339                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2340         }
2341
2342         rxq->rx_nb_avail = 0;
2343         rxq->rx_next_avail = 0;
2344         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2345         rxq->rx_tail = 0;
2346         rxq->nb_rx_hold = 0;
2347         rxq->pkt_first_seg = NULL;
2348         rxq->pkt_last_seg = NULL;
2349
2350 #ifdef RTE_IXGBE_INC_VECTOR
2351         rxq->rxrearm_start = 0;
2352         rxq->rxrearm_nb = 0;
2353 #endif
2354 }
2355
2356 int __attribute__((cold))
2357 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2358                          uint16_t queue_idx,
2359                          uint16_t nb_desc,
2360                          unsigned int socket_id,
2361                          const struct rte_eth_rxconf *rx_conf,
2362                          struct rte_mempool *mp)
2363 {
2364         const struct rte_memzone *rz;
2365         struct ixgbe_rx_queue *rxq;
2366         struct ixgbe_hw     *hw;
2367         uint16_t len;
2368         struct ixgbe_adapter *adapter =
2369                 (struct ixgbe_adapter *)dev->data->dev_private;
2370
2371         PMD_INIT_FUNC_TRACE();
2372         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2373
2374         /*
2375          * Validate number of receive descriptors.
2376          * It must not exceed hardware maximum, and must be multiple
2377          * of IXGBE_ALIGN.
2378          */
2379         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2380                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2381                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2382                 return (-EINVAL);
2383         }
2384
2385         /* Free memory prior to re-allocation if needed... */
2386         if (dev->data->rx_queues[queue_idx] != NULL) {
2387                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2388                 dev->data->rx_queues[queue_idx] = NULL;
2389         }
2390
2391         /* First allocate the rx queue data structure */
2392         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2393                                  RTE_CACHE_LINE_SIZE, socket_id);
2394         if (rxq == NULL)
2395                 return (-ENOMEM);
2396         rxq->mb_pool = mp;
2397         rxq->nb_rx_desc = nb_desc;
2398         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2399         rxq->queue_id = queue_idx;
2400         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2401                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2402         rxq->port_id = dev->data->port_id;
2403         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2404                                                         0 : ETHER_CRC_LEN);
2405         rxq->drop_en = rx_conf->rx_drop_en;
2406         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2407
2408         /*
2409          * Allocate RX ring hardware descriptors. A memzone large enough to
2410          * handle the maximum ring size is allocated in order to allow for
2411          * resizing in later calls to the queue setup function.
2412          */
2413         rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2414                                    RX_RING_SZ, socket_id);
2415         if (rz == NULL) {
2416                 ixgbe_rx_queue_release(rxq);
2417                 return (-ENOMEM);
2418         }
2419
2420         /*
2421          * Zero init all the descriptors in the ring.
2422          */
2423         memset (rz->addr, 0, RX_RING_SZ);
2424
2425         /*
2426          * Modified to setup VFRDT for Virtual Function
2427          */
2428         if (hw->mac.type == ixgbe_mac_82599_vf ||
2429             hw->mac.type == ixgbe_mac_X540_vf ||
2430             hw->mac.type == ixgbe_mac_X550_vf ||
2431             hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2432                 rxq->rdt_reg_addr =
2433                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2434                 rxq->rdh_reg_addr =
2435                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2436         }
2437         else {
2438                 rxq->rdt_reg_addr =
2439                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2440                 rxq->rdh_reg_addr =
2441                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2442         }
2443 #ifndef RTE_LIBRTE_XEN_DOM0
2444         rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2445 #else
2446         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2447 #endif
2448         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2449
2450         /*
2451          * Certain constraints must be met in order to use the bulk buffer
2452          * allocation Rx burst function. If any of Rx queues doesn't meet them
2453          * the feature should be disabled for the whole port.
2454          */
2455         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2456                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2457                                     "preconditions - canceling the feature for "
2458                                     "the whole port[%d]",
2459                              rxq->queue_id, rxq->port_id);
2460                 adapter->rx_bulk_alloc_allowed = false;
2461         }
2462
2463         /*
2464          * Allocate software ring. Allow for space at the end of the
2465          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2466          * function does not access an invalid memory region.
2467          */
2468         len = nb_desc;
2469         if (adapter->rx_bulk_alloc_allowed)
2470                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2471
2472         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2473                                           sizeof(struct ixgbe_rx_entry) * len,
2474                                           RTE_CACHE_LINE_SIZE, socket_id);
2475         if (!rxq->sw_ring) {
2476                 ixgbe_rx_queue_release(rxq);
2477                 return (-ENOMEM);
2478         }
2479
2480         /*
2481          * Always allocate even if it's not going to be needed in order to
2482          * simplify the code.
2483          *
2484          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2485          * be requested in ixgbe_dev_rx_init(), which is called later from
2486          * dev_start() flow.
2487          */
2488         rxq->sw_sc_ring =
2489                 rte_zmalloc_socket("rxq->sw_sc_ring",
2490                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2491                                    RTE_CACHE_LINE_SIZE, socket_id);
2492         if (!rxq->sw_sc_ring) {
2493                 ixgbe_rx_queue_release(rxq);
2494                 return (-ENOMEM);
2495         }
2496
2497         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2498                             "dma_addr=0x%"PRIx64,
2499                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2500                      rxq->rx_ring_phys_addr);
2501
2502         if (!rte_is_power_of_2(nb_desc)) {
2503                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2504                                     "preconditions - canceling the feature for "
2505                                     "the whole port[%d]",
2506                              rxq->queue_id, rxq->port_id);
2507                 adapter->rx_vec_allowed = false;
2508         } else
2509                 ixgbe_rxq_vec_setup(rxq);
2510
2511         dev->data->rx_queues[queue_idx] = rxq;
2512
2513         ixgbe_reset_rx_queue(adapter, rxq);
2514
2515         return 0;
2516 }
2517
2518 uint32_t
2519 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2520 {
2521 #define IXGBE_RXQ_SCAN_INTERVAL 4
2522         volatile union ixgbe_adv_rx_desc *rxdp;
2523         struct ixgbe_rx_queue *rxq;
2524         uint32_t desc = 0;
2525
2526         if (rx_queue_id >= dev->data->nb_rx_queues) {
2527                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2528                 return 0;
2529         }
2530
2531         rxq = dev->data->rx_queues[rx_queue_id];
2532         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2533
2534         while ((desc < rxq->nb_rx_desc) &&
2535                 (rxdp->wb.upper.status_error &
2536                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2537                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2538                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2539                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2540                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2541                                 desc - rxq->nb_rx_desc]);
2542         }
2543
2544         return desc;
2545 }
2546
2547 int
2548 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2549 {
2550         volatile union ixgbe_adv_rx_desc *rxdp;
2551         struct ixgbe_rx_queue *rxq = rx_queue;
2552         uint32_t desc;
2553
2554         if (unlikely(offset >= rxq->nb_rx_desc))
2555                 return 0;
2556         desc = rxq->rx_tail + offset;
2557         if (desc >= rxq->nb_rx_desc)
2558                 desc -= rxq->nb_rx_desc;
2559
2560         rxdp = &rxq->rx_ring[desc];
2561         return !!(rxdp->wb.upper.status_error &
2562                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2563 }
2564
2565 void __attribute__((cold))
2566 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2567 {
2568         unsigned i;
2569         struct ixgbe_adapter *adapter =
2570                 (struct ixgbe_adapter *)dev->data->dev_private;
2571
2572         PMD_INIT_FUNC_TRACE();
2573
2574         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2575                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2576                 if (txq != NULL) {
2577                         txq->ops->release_mbufs(txq);
2578                         txq->ops->reset(txq);
2579                 }
2580         }
2581
2582         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2583                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2584                 if (rxq != NULL) {
2585                         ixgbe_rx_queue_release_mbufs(rxq);
2586                         ixgbe_reset_rx_queue(adapter, rxq);
2587                 }
2588         }
2589 }
2590
2591 void
2592 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2593 {
2594         unsigned i;
2595
2596         PMD_INIT_FUNC_TRACE();
2597
2598         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2599                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2600                 dev->data->rx_queues[i] = NULL;
2601         }
2602         dev->data->nb_rx_queues = 0;
2603
2604         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2605                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2606                 dev->data->tx_queues[i] = NULL;
2607         }
2608         dev->data->nb_tx_queues = 0;
2609 }
2610
2611 /*********************************************************************
2612  *
2613  *  Device RX/TX init functions
2614  *
2615  **********************************************************************/
2616
2617 /**
2618  * Receive Side Scaling (RSS)
2619  * See section 7.1.2.8 in the following document:
2620  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2621  *
2622  * Principles:
2623  * The source and destination IP addresses of the IP header and the source
2624  * and destination ports of TCP/UDP headers, if any, of received packets are
2625  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2626  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2627  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2628  * RSS output index which is used as the RX queue index where to store the
2629  * received packets.
2630  * The following output is supplied in the RX write-back descriptor:
2631  *     - 32-bit result of the Microsoft RSS hash function,
2632  *     - 4-bit RSS type field.
2633  */
2634
2635 /*
2636  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2637  * Used as the default key.
2638  */
2639 static uint8_t rss_intel_key[40] = {
2640         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2641         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2642         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2643         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2644         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2645 };
2646
2647 static void
2648 ixgbe_rss_disable(struct rte_eth_dev *dev)
2649 {
2650         struct ixgbe_hw *hw;
2651         uint32_t mrqc;
2652         uint32_t mrqc_reg;
2653
2654         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2655         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2656         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2657         mrqc &= ~IXGBE_MRQC_RSSEN;
2658         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2659 }
2660
2661 static void
2662 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2663 {
2664         uint8_t  *hash_key;
2665         uint32_t mrqc;
2666         uint32_t rss_key;
2667         uint64_t rss_hf;
2668         uint16_t i;
2669         uint32_t mrqc_reg;
2670         uint32_t rssrk_reg;
2671
2672         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2673         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2674
2675         hash_key = rss_conf->rss_key;
2676         if (hash_key != NULL) {
2677                 /* Fill in RSS hash key */
2678                 for (i = 0; i < 10; i++) {
2679                         rss_key  = hash_key[(i * 4)];
2680                         rss_key |= hash_key[(i * 4) + 1] << 8;
2681                         rss_key |= hash_key[(i * 4) + 2] << 16;
2682                         rss_key |= hash_key[(i * 4) + 3] << 24;
2683                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2684                 }
2685         }
2686
2687         /* Set configured hashing protocols in MRQC register */
2688         rss_hf = rss_conf->rss_hf;
2689         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2690         if (rss_hf & ETH_RSS_IPV4)
2691                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2692         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2693                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2694         if (rss_hf & ETH_RSS_IPV6)
2695                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2696         if (rss_hf & ETH_RSS_IPV6_EX)
2697                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2698         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2699                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2700         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2701                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2702         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2703                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2704         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2705                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2706         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2707                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2708         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2709 }
2710
2711 int
2712 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2713                           struct rte_eth_rss_conf *rss_conf)
2714 {
2715         struct ixgbe_hw *hw;
2716         uint32_t mrqc;
2717         uint64_t rss_hf;
2718         uint32_t mrqc_reg;
2719
2720         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2721
2722         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2723                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2724                         "NIC.");
2725                 return -ENOTSUP;
2726         }
2727         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2728
2729         /*
2730          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2731          *     "RSS enabling cannot be done dynamically while it must be
2732          *      preceded by a software reset"
2733          * Before changing anything, first check that the update RSS operation
2734          * does not attempt to disable RSS, if RSS was enabled at
2735          * initialization time, or does not attempt to enable RSS, if RSS was
2736          * disabled at initialization time.
2737          */
2738         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2739         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2740         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2741                 if (rss_hf != 0) /* Enable RSS */
2742                         return -(EINVAL);
2743                 return 0; /* Nothing to do */
2744         }
2745         /* RSS enabled */
2746         if (rss_hf == 0) /* Disable RSS */
2747                 return -(EINVAL);
2748         ixgbe_hw_rss_hash_set(hw, rss_conf);
2749         return 0;
2750 }
2751
2752 int
2753 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2754                             struct rte_eth_rss_conf *rss_conf)
2755 {
2756         struct ixgbe_hw *hw;
2757         uint8_t *hash_key;
2758         uint32_t mrqc;
2759         uint32_t rss_key;
2760         uint64_t rss_hf;
2761         uint16_t i;
2762         uint32_t mrqc_reg;
2763         uint32_t rssrk_reg;
2764
2765         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2766         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2767         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2768         hash_key = rss_conf->rss_key;
2769         if (hash_key != NULL) {
2770                 /* Return RSS hash key */
2771                 for (i = 0; i < 10; i++) {
2772                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2773                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2774                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2775                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2776                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2777                 }
2778         }
2779
2780         /* Get RSS functions configured in MRQC register */
2781         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2782         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2783                 rss_conf->rss_hf = 0;
2784                 return 0;
2785         }
2786         rss_hf = 0;
2787         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2788                 rss_hf |= ETH_RSS_IPV4;
2789         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2790                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2791         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2792                 rss_hf |= ETH_RSS_IPV6;
2793         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2794                 rss_hf |= ETH_RSS_IPV6_EX;
2795         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2796                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2797         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2798                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2799         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2800                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2801         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2802                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2803         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2804                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2805         rss_conf->rss_hf = rss_hf;
2806         return 0;
2807 }
2808
2809 static void
2810 ixgbe_rss_configure(struct rte_eth_dev *dev)
2811 {
2812         struct rte_eth_rss_conf rss_conf;
2813         struct ixgbe_hw *hw;
2814         uint32_t reta;
2815         uint16_t i;
2816         uint16_t j;
2817         uint16_t sp_reta_size;
2818         uint32_t reta_reg;
2819
2820         PMD_INIT_FUNC_TRACE();
2821         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2822
2823         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2824
2825         /*
2826          * Fill in redirection table
2827          * The byte-swap is needed because NIC registers are in
2828          * little-endian order.
2829          */
2830         reta = 0;
2831         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2832                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2833
2834                 if (j == dev->data->nb_rx_queues)
2835                         j = 0;
2836                 reta = (reta << 8) | j;
2837                 if ((i & 3) == 3)
2838                         IXGBE_WRITE_REG(hw, reta_reg,
2839                                         rte_bswap32(reta));
2840         }
2841
2842         /*
2843          * Configure the RSS key and the RSS protocols used to compute
2844          * the RSS hash of input packets.
2845          */
2846         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2847         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2848                 ixgbe_rss_disable(dev);
2849                 return;
2850         }
2851         if (rss_conf.rss_key == NULL)
2852                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2853         ixgbe_hw_rss_hash_set(hw, &rss_conf);
2854 }
2855
2856 #define NUM_VFTA_REGISTERS 128
2857 #define NIC_RX_BUFFER_SIZE 0x200
2858 #define X550_RX_BUFFER_SIZE 0x180
2859
2860 static void
2861 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2862 {
2863         struct rte_eth_vmdq_dcb_conf *cfg;
2864         struct ixgbe_hw *hw;
2865         enum rte_eth_nb_pools num_pools;
2866         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2867         uint16_t pbsize;
2868         uint8_t nb_tcs; /* number of traffic classes */
2869         int i;
2870
2871         PMD_INIT_FUNC_TRACE();
2872         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2873         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2874         num_pools = cfg->nb_queue_pools;
2875         /* Check we have a valid number of pools */
2876         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2877                 ixgbe_rss_disable(dev);
2878                 return;
2879         }
2880         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2881         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2882
2883         /*
2884          * RXPBSIZE
2885          * split rx buffer up into sections, each for 1 traffic class
2886          */
2887         switch (hw->mac.type) {
2888         case ixgbe_mac_X550:
2889         case ixgbe_mac_X550EM_x:
2890                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2891                 break;
2892         default:
2893                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2894                 break;
2895         }
2896         for (i = 0 ; i < nb_tcs; i++) {
2897                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2898                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2899                 /* clear 10 bits. */
2900                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2901                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2902         }
2903         /* zero alloc all unused TCs */
2904         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2905                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2906                 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2907                 /* clear 10 bits. */
2908                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2909         }
2910
2911         /* MRQC: enable vmdq and dcb */
2912         mrqc = ((num_pools == ETH_16_POOLS) ? \
2913                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2914         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2915
2916         /* PFVTCTL: turn on virtualisation and set the default pool */
2917         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2918         if (cfg->enable_default_pool) {
2919                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2920         } else {
2921                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2922         }
2923
2924         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2925
2926         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2927         queue_mapping = 0;
2928         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2929                 /*
2930                  * mapping is done with 3 bits per priority,
2931                  * so shift by i*3 each time
2932                  */
2933                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2934
2935         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2936
2937         /* RTRPCS: DCB related */
2938         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2939
2940         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2941         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2942         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2943         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2944
2945         /* VFTA - enable all vlan filters */
2946         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2947                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2948         }
2949
2950         /* VFRE: pool enabling for receive - 16 or 32 */
2951         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2952                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2953
2954         /*
2955          * MPSAR - allow pools to read specific mac addresses
2956          * In this case, all pools should be able to read from mac addr 0
2957          */
2958         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2959         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2960
2961         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2962         for (i = 0; i < cfg->nb_pool_maps; i++) {
2963                 /* set vlan id in VF register and set the valid bit */
2964                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2965                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
2966                 /*
2967                  * Put the allowed pools in VFB reg. As we only have 16 or 32
2968                  * pools, we only need to use the first half of the register
2969                  * i.e. bits 0-31
2970                  */
2971                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2972         }
2973 }
2974
2975 /**
2976  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2977  * @hw: pointer to hardware structure
2978  * @dcb_config: pointer to ixgbe_dcb_config structure
2979  */
2980 static void
2981 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2982                struct ixgbe_dcb_config *dcb_config)
2983 {
2984         uint32_t reg;
2985         uint32_t q;
2986
2987         PMD_INIT_FUNC_TRACE();
2988         if (hw->mac.type != ixgbe_mac_82598EB) {
2989                 /* Disable the Tx desc arbiter so that MTQC can be changed */
2990                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2991                 reg |= IXGBE_RTTDCS_ARBDIS;
2992                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2993
2994                 /* Enable DCB for Tx with 8 TCs */
2995                 if (dcb_config->num_tcs.pg_tcs == 8) {
2996                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2997                 }
2998                 else {
2999                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3000                 }
3001                 if (dcb_config->vt_mode)
3002                     reg |= IXGBE_MTQC_VT_ENA;
3003                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3004
3005                 /* Disable drop for all queues */
3006                 for (q = 0; q < 128; q++)
3007                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3008                      (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3009
3010                 /* Enable the Tx desc arbiter */
3011                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3012                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3013                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3014
3015                 /* Enable Security TX Buffer IFG for DCB */
3016                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3017                 reg |= IXGBE_SECTX_DCB;
3018                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3019         }
3020         return;
3021 }
3022
3023 /**
3024  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3025  * @dev: pointer to rte_eth_dev structure
3026  * @dcb_config: pointer to ixgbe_dcb_config structure
3027  */
3028 static void
3029 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3030                         struct ixgbe_dcb_config *dcb_config)
3031 {
3032         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3033                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3034         struct ixgbe_hw *hw =
3035                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3036
3037         PMD_INIT_FUNC_TRACE();
3038         if (hw->mac.type != ixgbe_mac_82598EB)
3039                 /*PF VF Transmit Enable*/
3040                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3041                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3042
3043         /*Configure general DCB TX parameters*/
3044         ixgbe_dcb_tx_hw_config(hw,dcb_config);
3045         return;
3046 }
3047
3048 static void
3049 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3050                         struct ixgbe_dcb_config *dcb_config)
3051 {
3052         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3053                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3054         struct ixgbe_dcb_tc_config *tc;
3055         uint8_t i,j;
3056
3057         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3058         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3059                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3060                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3061         }
3062         else {
3063                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3064                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3065         }
3066         /* User Priority to Traffic Class mapping */
3067         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3068                 j = vmdq_rx_conf->dcb_tc[i];
3069                 tc = &dcb_config->tc_config[j];
3070                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3071                                                 (uint8_t)(1 << j);
3072         }
3073 }
3074
3075 static void
3076 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3077                         struct ixgbe_dcb_config *dcb_config)
3078 {
3079         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3080                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3081         struct ixgbe_dcb_tc_config *tc;
3082         uint8_t i,j;
3083
3084         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3085         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3086                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3087                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3088         }
3089         else {
3090                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3091                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3092         }
3093
3094         /* User Priority to Traffic Class mapping */
3095         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3096                 j = vmdq_tx_conf->dcb_tc[i];
3097                 tc = &dcb_config->tc_config[j];
3098                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3099                                                 (uint8_t)(1 << j);
3100         }
3101         return;
3102 }
3103
3104 static void
3105 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3106                 struct ixgbe_dcb_config *dcb_config)
3107 {
3108         struct rte_eth_dcb_rx_conf *rx_conf =
3109                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3110         struct ixgbe_dcb_tc_config *tc;
3111         uint8_t i,j;
3112
3113         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3114         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3115
3116         /* User Priority to Traffic Class mapping */
3117         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3118                 j = rx_conf->dcb_tc[i];
3119                 tc = &dcb_config->tc_config[j];
3120                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3121                                                 (uint8_t)(1 << j);
3122         }
3123 }
3124
3125 static void
3126 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3127                 struct ixgbe_dcb_config *dcb_config)
3128 {
3129         struct rte_eth_dcb_tx_conf *tx_conf =
3130                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3131         struct ixgbe_dcb_tc_config *tc;
3132         uint8_t i,j;
3133
3134         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3135         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3136
3137         /* User Priority to Traffic Class mapping */
3138         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3139                 j = tx_conf->dcb_tc[i];
3140                 tc = &dcb_config->tc_config[j];
3141                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3142                                                 (uint8_t)(1 << j);
3143         }
3144 }
3145
3146 /**
3147  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3148  * @hw: pointer to hardware structure
3149  * @dcb_config: pointer to ixgbe_dcb_config structure
3150  */
3151 static void
3152 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3153                struct ixgbe_dcb_config *dcb_config)
3154 {
3155         uint32_t reg;
3156         uint32_t vlanctrl;
3157         uint8_t i;
3158
3159         PMD_INIT_FUNC_TRACE();
3160         /*
3161          * Disable the arbiter before changing parameters
3162          * (always enable recycle mode; WSP)
3163          */
3164         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3165         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3166
3167         if (hw->mac.type != ixgbe_mac_82598EB) {
3168                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3169                 if (dcb_config->num_tcs.pg_tcs == 4) {
3170                         if (dcb_config->vt_mode)
3171                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3172                                         IXGBE_MRQC_VMDQRT4TCEN;
3173                         else {
3174                                 /* no matter the mode is DCB or DCB_RSS, just
3175                                  * set the MRQE to RSSXTCEN. RSS is controlled
3176                                  * by RSS_FIELD
3177                                  */
3178                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3179                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3180                                         IXGBE_MRQC_RTRSS4TCEN;
3181                         }
3182                 }
3183                 if (dcb_config->num_tcs.pg_tcs == 8) {
3184                         if (dcb_config->vt_mode)
3185                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3186                                         IXGBE_MRQC_VMDQRT8TCEN;
3187                         else {
3188                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3189                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3190                                         IXGBE_MRQC_RTRSS8TCEN;
3191                         }
3192                 }
3193
3194                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3195         }
3196
3197         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3198         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3199         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3200         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3201
3202         /* VFTA - enable all vlan filters */
3203         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3204                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3205         }
3206
3207         /*
3208          * Configure Rx packet plane (recycle mode; WSP) and
3209          * enable arbiter
3210          */
3211         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3212         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3213
3214         return;
3215 }
3216
3217 static void
3218 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3219                         uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3220 {
3221         switch (hw->mac.type) {
3222         case ixgbe_mac_82598EB:
3223                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3224                 break;
3225         case ixgbe_mac_82599EB:
3226         case ixgbe_mac_X540:
3227         case ixgbe_mac_X550:
3228         case ixgbe_mac_X550EM_x:
3229                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3230                                                   tsa, map);
3231                 break;
3232         default:
3233                 break;
3234         }
3235 }
3236
3237 static void
3238 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3239                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3240 {
3241         switch (hw->mac.type) {
3242         case ixgbe_mac_82598EB:
3243                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3244                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3245                 break;
3246         case ixgbe_mac_82599EB:
3247         case ixgbe_mac_X540:
3248         case ixgbe_mac_X550:
3249         case ixgbe_mac_X550EM_x:
3250                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3251                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3252                 break;
3253         default:
3254                 break;
3255         }
3256 }
3257
3258 #define DCB_RX_CONFIG  1
3259 #define DCB_TX_CONFIG  1
3260 #define DCB_TX_PB      1024
3261 /**
3262  * ixgbe_dcb_hw_configure - Enable DCB and configure
3263  * general DCB in VT mode and non-VT mode parameters
3264  * @dev: pointer to rte_eth_dev structure
3265  * @dcb_config: pointer to ixgbe_dcb_config structure
3266  */
3267 static int
3268 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3269                         struct ixgbe_dcb_config *dcb_config)
3270 {
3271         int     ret = 0;
3272         uint8_t i,pfc_en,nb_tcs;
3273         uint16_t pbsize, rx_buffer_size;
3274         uint8_t config_dcb_rx = 0;
3275         uint8_t config_dcb_tx = 0;
3276         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3277         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3278         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3279         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3280         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3281         struct ixgbe_dcb_tc_config *tc;
3282         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3283         struct ixgbe_hw *hw =
3284                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3285
3286         switch(dev->data->dev_conf.rxmode.mq_mode){
3287         case ETH_MQ_RX_VMDQ_DCB:
3288                 dcb_config->vt_mode = true;
3289                 if (hw->mac.type != ixgbe_mac_82598EB) {
3290                         config_dcb_rx = DCB_RX_CONFIG;
3291                         /*
3292                          *get dcb and VT rx configuration parameters
3293                          *from rte_eth_conf
3294                          */
3295                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3296                         /*Configure general VMDQ and DCB RX parameters*/
3297                         ixgbe_vmdq_dcb_configure(dev);
3298                 }
3299                 break;
3300         case ETH_MQ_RX_DCB:
3301         case ETH_MQ_RX_DCB_RSS:
3302                 dcb_config->vt_mode = false;
3303                 config_dcb_rx = DCB_RX_CONFIG;
3304                 /* Get dcb TX configuration parameters from rte_eth_conf */
3305                 ixgbe_dcb_rx_config(dev, dcb_config);
3306                 /*Configure general DCB RX parameters*/
3307                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3308                 break;
3309         default:
3310                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3311                 break;
3312         }
3313         switch (dev->data->dev_conf.txmode.mq_mode) {
3314         case ETH_MQ_TX_VMDQ_DCB:
3315                 dcb_config->vt_mode = true;
3316                 config_dcb_tx = DCB_TX_CONFIG;
3317                 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3318                 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3319                 /*Configure general VMDQ and DCB TX parameters*/
3320                 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3321                 break;
3322
3323         case ETH_MQ_TX_DCB:
3324                 dcb_config->vt_mode = false;
3325                 config_dcb_tx = DCB_TX_CONFIG;
3326                 /*get DCB TX configuration parameters from rte_eth_conf*/
3327                 ixgbe_dcb_tx_config(dev, dcb_config);
3328                 /*Configure general DCB TX parameters*/
3329                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3330                 break;
3331         default:
3332                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3333                 break;
3334         }
3335
3336         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3337         /* Unpack map */
3338         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3339         if(nb_tcs == ETH_4_TCS) {
3340                 /* Avoid un-configured priority mapping to TC0 */
3341                 uint8_t j = 4;
3342                 uint8_t mask = 0xFF;
3343                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3344                         mask = (uint8_t)(mask & (~ (1 << map[i])));
3345                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3346                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3347                                 map[j++] = i;
3348                         mask >>= 1;
3349                 }
3350                 /* Re-configure 4 TCs BW */
3351                 for (i = 0; i < nb_tcs; i++) {
3352                         tc = &dcb_config->tc_config[i];
3353                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3354                                                 (uint8_t)(100 / nb_tcs);
3355                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3356                                                 (uint8_t)(100 / nb_tcs);
3357                 }
3358                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3359                         tc = &dcb_config->tc_config[i];
3360                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3361                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3362                 }
3363         }
3364
3365         switch (hw->mac.type) {
3366         case ixgbe_mac_X550:
3367         case ixgbe_mac_X550EM_x:
3368                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3369                 break;
3370         default:
3371                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3372                 break;
3373         }
3374
3375         if(config_dcb_rx) {
3376                 /* Set RX buffer size */
3377                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3378                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3379                 for (i = 0 ; i < nb_tcs; i++) {
3380                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3381                 }
3382                 /* zero alloc all unused TCs */
3383                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3384                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3385                 }
3386         }
3387         if(config_dcb_tx) {
3388                 /* Only support an equally distributed Tx packet buffer strategy. */
3389                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3390                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3391                 for (i = 0; i < nb_tcs; i++) {
3392                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3393                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3394                 }
3395                 /* Clear unused TCs, if any, to zero buffer size*/
3396                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3397                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3398                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3399                 }
3400         }
3401
3402         /*Calculates traffic class credits*/
3403         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3404                                 IXGBE_DCB_TX_CONFIG);
3405         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3406                                 IXGBE_DCB_RX_CONFIG);
3407
3408         if(config_dcb_rx) {
3409                 /* Unpack CEE standard containers */
3410                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3411                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3412                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3413                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3414                 /* Configure PG(ETS) RX */
3415                 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3416         }
3417
3418         if(config_dcb_tx) {
3419                 /* Unpack CEE standard containers */
3420                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3421                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3422                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3423                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3424                 /* Configure PG(ETS) TX */
3425                 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3426         }
3427
3428         /*Configure queue statistics registers*/
3429         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3430
3431         /* Check if the PFC is supported */
3432         if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3433                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3434                 for (i = 0; i < nb_tcs; i++) {
3435                         /*
3436                         * If the TC count is 8,and the default high_water is 48,
3437                         * the low_water is 16 as default.
3438                         */
3439                         hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3440                         hw->fc.low_water[i] = pbsize / 4;
3441                         /* Enable pfc for this TC */
3442                         tc = &dcb_config->tc_config[i];
3443                         tc->pfc = ixgbe_dcb_pfc_enabled;
3444                 }
3445                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3446                 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3447                         pfc_en &= 0x0F;
3448                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3449         }
3450
3451         return ret;
3452 }
3453
3454 /**
3455  * ixgbe_configure_dcb - Configure DCB  Hardware
3456  * @dev: pointer to rte_eth_dev
3457  */
3458 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3459 {
3460         struct ixgbe_dcb_config *dcb_cfg =
3461                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3462         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3463
3464         PMD_INIT_FUNC_TRACE();
3465
3466         /* check support mq_mode for DCB */
3467         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3468             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3469             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3470                 return;
3471
3472         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3473                 return;
3474
3475         /** Configure DCB hardware **/
3476         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3477
3478         return;
3479 }
3480
3481 /*
3482  * VMDq only support for 10 GbE NIC.
3483  */
3484 static void
3485 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3486 {
3487         struct rte_eth_vmdq_rx_conf *cfg;
3488         struct ixgbe_hw *hw;
3489         enum rte_eth_nb_pools num_pools;
3490         uint32_t mrqc, vt_ctl, vlanctrl;
3491         uint32_t vmolr = 0;
3492         int i;
3493
3494         PMD_INIT_FUNC_TRACE();
3495         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3496         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3497         num_pools = cfg->nb_queue_pools;
3498
3499         ixgbe_rss_disable(dev);
3500
3501         /* MRQC: enable vmdq */
3502         mrqc = IXGBE_MRQC_VMDQEN;
3503         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3504
3505         /* PFVTCTL: turn on virtualisation and set the default pool */
3506         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3507         if (cfg->enable_default_pool)
3508                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3509         else
3510                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3511
3512         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3513
3514         for (i = 0; i < (int)num_pools; i++) {
3515                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3516                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3517         }
3518
3519         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3520         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3521         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3522         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3523
3524         /* VFTA - enable all vlan filters */
3525         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3526                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3527
3528         /* VFRE: pool enabling for receive - 64 */
3529         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3530         if (num_pools == ETH_64_POOLS)
3531                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3532
3533         /*
3534          * MPSAR - allow pools to read specific mac addresses
3535          * In this case, all pools should be able to read from mac addr 0
3536          */
3537         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3538         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3539
3540         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3541         for (i = 0; i < cfg->nb_pool_maps; i++) {
3542                 /* set vlan id in VF register and set the valid bit */
3543                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3544                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3545                 /*
3546                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3547                  * pools, we only need to use the first half of the register
3548                  * i.e. bits 0-31
3549                  */
3550                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3551                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3552                                         (cfg->pool_map[i].pools & UINT32_MAX));
3553                 else
3554                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3555                                         ((cfg->pool_map[i].pools >> 32) \
3556                                         & UINT32_MAX));
3557
3558         }
3559
3560         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3561         if (cfg->enable_loop_back) {
3562                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3563                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3564                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3565         }
3566
3567         IXGBE_WRITE_FLUSH(hw);
3568 }
3569
3570 /*
3571  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3572  * @hw: pointer to hardware structure
3573  */
3574 static void
3575 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3576 {
3577         uint32_t reg;
3578         uint32_t q;
3579
3580         PMD_INIT_FUNC_TRACE();
3581         /*PF VF Transmit Enable*/
3582         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3583         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3584
3585         /* Disable the Tx desc arbiter so that MTQC can be changed */
3586         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3587         reg |= IXGBE_RTTDCS_ARBDIS;
3588         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3589
3590         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3591         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3592
3593         /* Disable drop for all queues */
3594         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3595                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3596                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3597
3598         /* Enable the Tx desc arbiter */
3599         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3600         reg &= ~IXGBE_RTTDCS_ARBDIS;
3601         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3602
3603         IXGBE_WRITE_FLUSH(hw);
3604
3605         return;
3606 }
3607
3608 static int __attribute__((cold))
3609 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3610 {
3611         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3612         uint64_t dma_addr;
3613         unsigned i;
3614
3615         /* Initialize software ring entries */
3616         for (i = 0; i < rxq->nb_rx_desc; i++) {
3617                 volatile union ixgbe_adv_rx_desc *rxd;
3618                 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3619                 if (mbuf == NULL) {
3620                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3621                                      (unsigned) rxq->queue_id);
3622                         return (-ENOMEM);
3623                 }
3624
3625                 rte_mbuf_refcnt_set(mbuf, 1);
3626                 mbuf->next = NULL;
3627                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3628                 mbuf->nb_segs = 1;
3629                 mbuf->port = rxq->port_id;
3630
3631                 dma_addr =
3632                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3633                 rxd = &rxq->rx_ring[i];
3634                 rxd->read.hdr_addr = 0;
3635                 rxd->read.pkt_addr = dma_addr;
3636                 rxe[i].mbuf = mbuf;
3637         }
3638
3639         return 0;
3640 }
3641
3642 static int
3643 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3644 {
3645         struct ixgbe_hw *hw;
3646         uint32_t mrqc;
3647
3648         ixgbe_rss_configure(dev);
3649
3650         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3651
3652         /* MRQC: enable VF RSS */
3653         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3654         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3655         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3656         case ETH_64_POOLS:
3657                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3658                 break;
3659
3660         case ETH_32_POOLS:
3661                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3662                 break;
3663
3664         default:
3665                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3666                 return -EINVAL;
3667         }
3668
3669         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3670
3671         return 0;
3672 }
3673
3674 static int
3675 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3676 {
3677         struct ixgbe_hw *hw =
3678                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3679
3680         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3681         case ETH_64_POOLS:
3682                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3683                         IXGBE_MRQC_VMDQEN);
3684                 break;
3685
3686         case ETH_32_POOLS:
3687                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3688                         IXGBE_MRQC_VMDQRT4TCEN);
3689                 break;
3690
3691         case ETH_16_POOLS:
3692                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3693                         IXGBE_MRQC_VMDQRT8TCEN);
3694                 break;
3695         default:
3696                 PMD_INIT_LOG(ERR,
3697                         "invalid pool number in IOV mode");
3698                 break;
3699         }
3700         return 0;
3701 }
3702
3703 static int
3704 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3705 {
3706         struct ixgbe_hw *hw =
3707                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3708
3709         if (hw->mac.type == ixgbe_mac_82598EB)
3710                 return 0;
3711
3712         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3713                 /*
3714                  * SRIOV inactive scheme
3715                  * any DCB/RSS w/o VMDq multi-queue setting
3716                  */
3717                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3718                 case ETH_MQ_RX_RSS:
3719                 case ETH_MQ_RX_DCB_RSS:
3720                 case ETH_MQ_RX_VMDQ_RSS:
3721                         ixgbe_rss_configure(dev);
3722                         break;
3723
3724                 case ETH_MQ_RX_VMDQ_DCB:
3725                         ixgbe_vmdq_dcb_configure(dev);
3726                         break;
3727
3728                 case ETH_MQ_RX_VMDQ_ONLY:
3729                         ixgbe_vmdq_rx_hw_configure(dev);
3730                         break;
3731
3732                 case ETH_MQ_RX_NONE:
3733                 default:
3734                         /* if mq_mode is none, disable rss mode.*/
3735                         ixgbe_rss_disable(dev);
3736                         break;
3737                 }
3738         } else {
3739                 /*
3740                  * SRIOV active scheme
3741                  * Support RSS together with VMDq & SRIOV
3742                  */
3743                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3744                 case ETH_MQ_RX_RSS:
3745                 case ETH_MQ_RX_VMDQ_RSS:
3746                         ixgbe_config_vf_rss(dev);
3747                         break;
3748
3749                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3750                 case ETH_MQ_RX_VMDQ_DCB:
3751                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3752                         PMD_INIT_LOG(ERR,
3753                                 "Could not support DCB with VMDq & SRIOV");
3754                         return -1;
3755                 default:
3756                         ixgbe_config_vf_default(dev);
3757                         break;
3758                 }
3759         }
3760
3761         return 0;
3762 }
3763
3764 static int
3765 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3766 {
3767         struct ixgbe_hw *hw =
3768                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3769         uint32_t mtqc;
3770         uint32_t rttdcs;
3771
3772         if (hw->mac.type == ixgbe_mac_82598EB)
3773                 return 0;
3774
3775         /* disable arbiter before setting MTQC */
3776         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3777         rttdcs |= IXGBE_RTTDCS_ARBDIS;
3778         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3779
3780         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3781                 /*
3782                  * SRIOV inactive scheme
3783                  * any DCB w/o VMDq multi-queue setting
3784                  */
3785                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3786                         ixgbe_vmdq_tx_hw_configure(hw);
3787                 else {
3788                         mtqc = IXGBE_MTQC_64Q_1PB;
3789                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3790                 }
3791         } else {
3792                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3793
3794                 /*
3795                  * SRIOV active scheme
3796                  * FIXME if support DCB together with VMDq & SRIOV
3797                  */
3798                 case ETH_64_POOLS:
3799                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3800                         break;
3801                 case ETH_32_POOLS:
3802                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3803                         break;
3804                 case ETH_16_POOLS:
3805                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3806                                 IXGBE_MTQC_8TC_8TQ;
3807                         break;
3808                 default:
3809                         mtqc = IXGBE_MTQC_64Q_1PB;
3810                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3811                 }
3812                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3813         }
3814
3815         /* re-enable arbiter */
3816         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3817         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3818
3819         return 0;
3820 }
3821
3822 /**
3823  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3824  *
3825  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3826  * spec rev. 3.0 chapter 8.2.3.8.13.
3827  *
3828  * @pool Memory pool of the Rx queue
3829  */
3830 static inline uint32_t
3831 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3832 {
3833         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3834
3835         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3836         uint16_t maxdesc =
3837                 IPV4_MAX_PKT_LEN /
3838                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3839
3840         if (maxdesc >= 16)
3841                 return IXGBE_RSCCTL_MAXDESC_16;
3842         else if (maxdesc >= 8)
3843                 return IXGBE_RSCCTL_MAXDESC_8;
3844         else if (maxdesc >= 4)
3845                 return IXGBE_RSCCTL_MAXDESC_4;
3846         else
3847                 return IXGBE_RSCCTL_MAXDESC_1;
3848 }
3849
3850 /**
3851  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3852  * interrupt
3853  *
3854  * (Taken from FreeBSD tree)
3855  * (yes this is all very magic and confusing :)
3856  *
3857  * @dev port handle
3858  * @entry the register array entry
3859  * @vector the MSIX vector for this queue
3860  * @type RX/TX/MISC
3861  */
3862 static void
3863 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3864 {
3865         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3866         u32 ivar, index;
3867
3868         vector |= IXGBE_IVAR_ALLOC_VAL;
3869
3870         switch (hw->mac.type) {
3871
3872         case ixgbe_mac_82598EB:
3873                 if (type == -1)
3874                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3875                 else
3876                         entry += (type * 64);
3877                 index = (entry >> 2) & 0x1F;
3878                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3879                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3880                 ivar |= (vector << (8 * (entry & 0x3)));
3881                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3882                 break;
3883
3884         case ixgbe_mac_82599EB:
3885         case ixgbe_mac_X540:
3886                 if (type == -1) { /* MISC IVAR */
3887                         index = (entry & 1) * 8;
3888                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3889                         ivar &= ~(0xFF << index);
3890                         ivar |= (vector << index);
3891                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3892                 } else {        /* RX/TX IVARS */
3893                         index = (16 * (entry & 1)) + (8 * type);
3894                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3895                         ivar &= ~(0xFF << index);
3896                         ivar |= (vector << index);
3897                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3898                 }
3899
3900                 break;
3901
3902         default:
3903                 break;
3904         }
3905 }
3906
3907 void __attribute__((cold))
3908 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3909 {
3910         uint16_t i, rx_using_sse;
3911         struct ixgbe_adapter *adapter =
3912                 (struct ixgbe_adapter *)dev->data->dev_private;
3913
3914         /*
3915          * In order to allow Vector Rx there are a few configuration
3916          * conditions to be met and Rx Bulk Allocation should be allowed.
3917          */
3918         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3919             !adapter->rx_bulk_alloc_allowed) {
3920                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3921                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
3922                                     "not enabled",
3923                              dev->data->port_id);
3924
3925                 adapter->rx_vec_allowed = false;
3926         }
3927
3928         /*
3929          * Initialize the appropriate LRO callback.
3930          *
3931          * If all queues satisfy the bulk allocation preconditions
3932          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3933          * Otherwise use a single allocation version.
3934          */
3935         if (dev->data->lro) {
3936                 if (adapter->rx_bulk_alloc_allowed) {
3937                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3938                                            "allocation version");
3939                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3940                 } else {
3941                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3942                                            "allocation version");
3943                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3944                 }
3945         } else if (dev->data->scattered_rx) {
3946                 /*
3947                  * Set the non-LRO scattered callback: there are Vector and
3948                  * single allocation versions.
3949                  */
3950                 if (adapter->rx_vec_allowed) {
3951                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3952                                             "callback (port=%d).",
3953                                      dev->data->port_id);
3954
3955                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3956                 } else if (adapter->rx_bulk_alloc_allowed) {
3957                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3958                                            "allocation callback (port=%d).",
3959                                      dev->data->port_id);
3960                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3961                 } else {
3962                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3963                                             "single allocation) "
3964                                             "Scattered Rx callback "
3965                                             "(port=%d).",
3966                                      dev->data->port_id);
3967
3968                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3969                 }
3970         /*
3971          * Below we set "simple" callbacks according to port/queues parameters.
3972          * If parameters allow we are going to choose between the following
3973          * callbacks:
3974          *    - Vector
3975          *    - Bulk Allocation
3976          *    - Single buffer allocation (the simplest one)
3977          */
3978         } else if (adapter->rx_vec_allowed) {
3979                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3980                                     "burst size no less than %d (port=%d).",
3981                              RTE_IXGBE_DESCS_PER_LOOP,
3982                              dev->data->port_id);
3983
3984                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3985         } else if (adapter->rx_bulk_alloc_allowed) {
3986                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3987                                     "satisfied. Rx Burst Bulk Alloc function "
3988                                     "will be used on port=%d.",
3989                              dev->data->port_id);
3990
3991                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
3992         } else {
3993                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3994                                     "satisfied, or Scattered Rx is requested "
3995                                     "(port=%d).",
3996                              dev->data->port_id);
3997
3998                 dev->rx_pkt_burst = ixgbe_recv_pkts;
3999         }
4000
4001         /* Propagate information about RX function choice through all queues. */
4002
4003         rx_using_sse =
4004                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4005                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4006
4007         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4008                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4009                 rxq->rx_using_sse = rx_using_sse;
4010         }
4011 }
4012
4013 /**
4014  * ixgbe_set_rsc - configure RSC related port HW registers
4015  *
4016  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4017  * of 82599 Spec (x540 configuration is virtually the same).
4018  *
4019  * @dev port handle
4020  *
4021  * Returns 0 in case of success or a non-zero error code
4022  */
4023 static int
4024 ixgbe_set_rsc(struct rte_eth_dev *dev)
4025 {
4026         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4027         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4028         struct rte_eth_dev_info dev_info = { 0 };
4029         bool rsc_capable = false;
4030         uint16_t i;
4031         uint32_t rdrxctl;
4032
4033         /* Sanity check */
4034         dev->dev_ops->dev_infos_get(dev, &dev_info);
4035         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4036                 rsc_capable = true;
4037
4038         if (!rsc_capable && rx_conf->enable_lro) {
4039                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4040                                    "support it");
4041                 return -EINVAL;
4042         }
4043
4044         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4045
4046         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4047                 /*
4048                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4049                  * 3.0 RSC configuration requires HW CRC stripping being
4050                  * enabled. If user requested both HW CRC stripping off
4051                  * and RSC on - return an error.
4052                  */
4053                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4054                                     "is disabled");
4055                 return -EINVAL;
4056         }
4057
4058         /* RFCTL configuration  */
4059         if (rsc_capable) {
4060                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4061                 if (rx_conf->enable_lro)
4062                         /*
4063                          * Since NFS packets coalescing is not supported - clear
4064                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4065                          * enabled.
4066                          */
4067                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4068                                    IXGBE_RFCTL_NFSR_DIS);
4069                 else
4070                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4071
4072                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4073         }
4074
4075         /* If LRO hasn't been requested - we are done here. */
4076         if (!rx_conf->enable_lro)
4077                 return 0;
4078
4079         /* Set RDRXCTL.RSCACKC bit */
4080         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4081         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4082         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4083
4084         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4085         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4086                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4087                 uint32_t srrctl =
4088                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4089                 uint32_t rscctl =
4090                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4091                 uint32_t psrtype =
4092                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4093                 uint32_t eitr =
4094                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4095
4096                 /*
4097                  * ixgbe PMD doesn't support header-split at the moment.
4098                  *
4099                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4100                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4101                  * should be configured even if header split is not
4102                  * enabled. We will configure it 128 bytes following the
4103                  * recommendation in the spec.
4104                  */
4105                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4106                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4107                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4108
4109                 /*
4110                  * TODO: Consider setting the Receive Descriptor Minimum
4111                  * Threshold Size for an RSC case. This is not an obviously
4112                  * beneficiary option but the one worth considering...
4113                  */
4114
4115                 rscctl |= IXGBE_RSCCTL_RSCEN;
4116                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4117                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4118
4119                 /*
4120                  * RSC: Set ITR interval corresponding to 2K ints/s.
4121                  *
4122                  * Full-sized RSC aggregations for a 10Gb/s link will
4123                  * arrive at about 20K aggregation/s rate.
4124                  *
4125                  * 2K inst/s rate will make only 10% of the
4126                  * aggregations to be closed due to the interrupt timer
4127                  * expiration for a streaming at wire-speed case.
4128                  *
4129                  * For a sparse streaming case this setting will yield
4130                  * at most 500us latency for a single RSC aggregation.
4131                  */
4132                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4133                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4134
4135                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4136                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4137                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4138                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4139
4140                 /*
4141                  * RSC requires the mapping of the queue to the
4142                  * interrupt vector.
4143                  */
4144                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4145         }
4146
4147         dev->data->lro = 1;
4148
4149         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4150
4151         return 0;
4152 }
4153
4154 /*
4155  * Initializes Receive Unit.
4156  */
4157 int __attribute__((cold))
4158 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4159 {
4160         struct ixgbe_hw     *hw;
4161         struct ixgbe_rx_queue *rxq;
4162         uint64_t bus_addr;
4163         uint32_t rxctrl;
4164         uint32_t fctrl;
4165         uint32_t hlreg0;
4166         uint32_t maxfrs;
4167         uint32_t srrctl;
4168         uint32_t rdrxctl;
4169         uint32_t rxcsum;
4170         uint16_t buf_size;
4171         uint16_t i;
4172         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4173         int rc;
4174
4175         PMD_INIT_FUNC_TRACE();
4176         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4177
4178         /*
4179          * Make sure receives are disabled while setting
4180          * up the RX context (registers, descriptor rings, etc.).
4181          */
4182         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4183         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4184
4185         /* Enable receipt of broadcasted frames */
4186         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4187         fctrl |= IXGBE_FCTRL_BAM;
4188         fctrl |= IXGBE_FCTRL_DPF;
4189         fctrl |= IXGBE_FCTRL_PMCF;
4190         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4191
4192         /*
4193          * Configure CRC stripping, if any.
4194          */
4195         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4196         if (rx_conf->hw_strip_crc)
4197                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4198         else
4199                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4200
4201         /*
4202          * Configure jumbo frame support, if any.
4203          */
4204         if (rx_conf->jumbo_frame == 1) {
4205                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4206                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4207                 maxfrs &= 0x0000FFFF;
4208                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4209                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4210         } else
4211                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4212
4213         /*
4214          * If loopback mode is configured for 82599, set LPBK bit.
4215          */
4216         if (hw->mac.type == ixgbe_mac_82599EB &&
4217                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4218                 hlreg0 |= IXGBE_HLREG0_LPBK;
4219         else
4220                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4221
4222         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4223
4224         /* Setup RX queues */
4225         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4226                 rxq = dev->data->rx_queues[i];
4227
4228                 /*
4229                  * Reset crc_len in case it was changed after queue setup by a
4230                  * call to configure.
4231                  */
4232                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4233
4234                 /* Setup the Base and Length of the Rx Descriptor Rings */
4235                 bus_addr = rxq->rx_ring_phys_addr;
4236                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4237                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4238                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4239                                 (uint32_t)(bus_addr >> 32));
4240                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4241                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4242                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4243                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4244
4245                 /* Configure the SRRCTL register */
4246 #ifdef RTE_HEADER_SPLIT_ENABLE
4247                 /*
4248                  * Configure Header Split
4249                  */
4250                 if (rx_conf->header_split) {
4251                         if (hw->mac.type == ixgbe_mac_82599EB) {
4252                                 /* Must setup the PSRTYPE register */
4253                                 uint32_t psrtype;
4254                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4255                                         IXGBE_PSRTYPE_UDPHDR   |
4256                                         IXGBE_PSRTYPE_IPV4HDR  |
4257                                         IXGBE_PSRTYPE_IPV6HDR;
4258                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4259                         }
4260                         srrctl = ((rx_conf->split_hdr_size <<
4261                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4262                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4263                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4264                 } else
4265 #endif
4266                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4267
4268                 /* Set if packets are dropped when no descriptors available */
4269                 if (rxq->drop_en)
4270                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4271
4272                 /*
4273                  * Configure the RX buffer size in the BSIZEPACKET field of
4274                  * the SRRCTL register of the queue.
4275                  * The value is in 1 KB resolution. Valid values can be from
4276                  * 1 KB to 16 KB.
4277                  */
4278                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4279                         RTE_PKTMBUF_HEADROOM);
4280                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4281                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4282
4283                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4284
4285                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4286                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4287
4288                 /* It adds dual VLAN length for supporting dual VLAN */
4289                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4290                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4291                         dev->data->scattered_rx = 1;
4292         }
4293
4294         if (rx_conf->enable_scatter)
4295                 dev->data->scattered_rx = 1;
4296
4297         /*
4298          * Device configured with multiple RX queues.
4299          */
4300         ixgbe_dev_mq_rx_configure(dev);
4301
4302         /*
4303          * Setup the Checksum Register.
4304          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4305          * Enable IP/L4 checkum computation by hardware if requested to do so.
4306          */
4307         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4308         rxcsum |= IXGBE_RXCSUM_PCSD;
4309         if (rx_conf->hw_ip_checksum)
4310                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4311         else
4312                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4313
4314         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4315
4316         if (hw->mac.type == ixgbe_mac_82599EB ||
4317             hw->mac.type == ixgbe_mac_X540) {
4318                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4319                 if (rx_conf->hw_strip_crc)
4320                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4321                 else
4322                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4323                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4324                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4325         }
4326
4327         rc = ixgbe_set_rsc(dev);
4328         if (rc)
4329                 return rc;
4330
4331         ixgbe_set_rx_function(dev);
4332
4333         return 0;
4334 }
4335
4336 /*
4337  * Initializes Transmit Unit.
4338  */
4339 void __attribute__((cold))
4340 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4341 {
4342         struct ixgbe_hw     *hw;
4343         struct ixgbe_tx_queue *txq;
4344         uint64_t bus_addr;
4345         uint32_t hlreg0;
4346         uint32_t txctrl;
4347         uint16_t i;
4348
4349         PMD_INIT_FUNC_TRACE();
4350         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4351
4352         /* Enable TX CRC (checksum offload requirement) and hw padding
4353          * (TSO requirement) */
4354         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4355         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4356         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4357
4358         /* Setup the Base and Length of the Tx Descriptor Rings */
4359         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4360                 txq = dev->data->tx_queues[i];
4361
4362                 bus_addr = txq->tx_ring_phys_addr;
4363                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4364                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4365                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4366                                 (uint32_t)(bus_addr >> 32));
4367                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4368                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4369                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4370                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4371                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4372
4373                 /*
4374                  * Disable Tx Head Writeback RO bit, since this hoses
4375                  * bookkeeping if things aren't delivered in order.
4376                  */
4377                 switch (hw->mac.type) {
4378                         case ixgbe_mac_82598EB:
4379                                 txctrl = IXGBE_READ_REG(hw,
4380                                                         IXGBE_DCA_TXCTRL(txq->reg_idx));
4381                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4382                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4383                                                 txctrl);
4384                                 break;
4385
4386                         case ixgbe_mac_82599EB:
4387                         case ixgbe_mac_X540:
4388                         case ixgbe_mac_X550:
4389                         case ixgbe_mac_X550EM_x:
4390                         default:
4391                                 txctrl = IXGBE_READ_REG(hw,
4392                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4393                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4394                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4395                                                 txctrl);
4396                                 break;
4397                 }
4398         }
4399
4400         /* Device configured with multiple TX queues. */
4401         ixgbe_dev_mq_tx_configure(dev);
4402 }
4403
4404 /*
4405  * Set up link for 82599 loopback mode Tx->Rx.
4406  */
4407 static inline void __attribute__((cold))
4408 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4409 {
4410         PMD_INIT_FUNC_TRACE();
4411
4412         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4413                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4414                                 IXGBE_SUCCESS) {
4415                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4416                         /* ignore error */
4417                         return;
4418                 }
4419         }
4420
4421         /* Restart link */
4422         IXGBE_WRITE_REG(hw,
4423                         IXGBE_AUTOC,
4424                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4425         ixgbe_reset_pipeline_82599(hw);
4426
4427         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4428         msec_delay(50);
4429 }
4430
4431
4432 /*
4433  * Start Transmit and Receive Units.
4434  */
4435 int __attribute__((cold))
4436 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4437 {
4438         struct ixgbe_hw     *hw;
4439         struct ixgbe_tx_queue *txq;
4440         struct ixgbe_rx_queue *rxq;
4441         uint32_t txdctl;
4442         uint32_t dmatxctl;
4443         uint32_t rxctrl;
4444         uint16_t i;
4445         int ret = 0;
4446
4447         PMD_INIT_FUNC_TRACE();
4448         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4449
4450         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4451                 txq = dev->data->tx_queues[i];
4452                 /* Setup Transmit Threshold Registers */
4453                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4454                 txdctl |= txq->pthresh & 0x7F;
4455                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4456                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4457                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4458         }
4459
4460         if (hw->mac.type != ixgbe_mac_82598EB) {
4461                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4462                 dmatxctl |= IXGBE_DMATXCTL_TE;
4463                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4464         }
4465
4466         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4467                 txq = dev->data->tx_queues[i];
4468                 if (!txq->tx_deferred_start) {
4469                         ret = ixgbe_dev_tx_queue_start(dev, i);
4470                         if (ret < 0)
4471                                 return ret;
4472                 }
4473         }
4474
4475         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4476                 rxq = dev->data->rx_queues[i];
4477                 if (!rxq->rx_deferred_start) {
4478                         ret = ixgbe_dev_rx_queue_start(dev, i);
4479                         if (ret < 0)
4480                                 return ret;
4481                 }
4482         }
4483
4484         /* Enable Receive engine */
4485         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4486         if (hw->mac.type == ixgbe_mac_82598EB)
4487                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4488         rxctrl |= IXGBE_RXCTRL_RXEN;
4489         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4490
4491         /* If loopback mode is enabled for 82599, set up the link accordingly */
4492         if (hw->mac.type == ixgbe_mac_82599EB &&
4493                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4494                 ixgbe_setup_loopback_link_82599(hw);
4495
4496         return 0;
4497 }
4498
4499 /*
4500  * Start Receive Units for specified queue.
4501  */
4502 int __attribute__((cold))
4503 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4504 {
4505         struct ixgbe_hw     *hw;
4506         struct ixgbe_rx_queue *rxq;
4507         uint32_t rxdctl;
4508         int poll_ms;
4509
4510         PMD_INIT_FUNC_TRACE();
4511         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4512
4513         if (rx_queue_id < dev->data->nb_rx_queues) {
4514                 rxq = dev->data->rx_queues[rx_queue_id];
4515
4516                 /* Allocate buffers for descriptor rings */
4517                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4518                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4519                                      rx_queue_id);
4520                         return -1;
4521                 }
4522                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4523                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4524                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4525
4526                 /* Wait until RX Enable ready */
4527                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4528                 do {
4529                         rte_delay_ms(1);
4530                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4531                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4532                 if (!poll_ms)
4533                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4534                                      rx_queue_id);
4535                 rte_wmb();
4536                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4537                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4538                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4539         } else
4540                 return -1;
4541
4542         return 0;
4543 }
4544
4545 /*
4546  * Stop Receive Units for specified queue.
4547  */
4548 int __attribute__((cold))
4549 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4550 {
4551         struct ixgbe_hw     *hw;
4552         struct ixgbe_adapter *adapter =
4553                 (struct ixgbe_adapter *)dev->data->dev_private;
4554         struct ixgbe_rx_queue *rxq;
4555         uint32_t rxdctl;
4556         int poll_ms;
4557
4558         PMD_INIT_FUNC_TRACE();
4559         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4560
4561         if (rx_queue_id < dev->data->nb_rx_queues) {
4562                 rxq = dev->data->rx_queues[rx_queue_id];
4563
4564                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4565                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4566                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4567
4568                 /* Wait until RX Enable ready */
4569                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4570                 do {
4571                         rte_delay_ms(1);
4572                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4573                 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4574                 if (!poll_ms)
4575                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4576                                      rx_queue_id);
4577
4578                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4579
4580                 ixgbe_rx_queue_release_mbufs(rxq);
4581                 ixgbe_reset_rx_queue(adapter, rxq);
4582                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4583         } else
4584                 return -1;
4585
4586         return 0;
4587 }
4588
4589
4590 /*
4591  * Start Transmit Units for specified queue.
4592  */
4593 int __attribute__((cold))
4594 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4595 {
4596         struct ixgbe_hw     *hw;
4597         struct ixgbe_tx_queue *txq;
4598         uint32_t txdctl;
4599         int poll_ms;
4600
4601         PMD_INIT_FUNC_TRACE();
4602         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4603
4604         if (tx_queue_id < dev->data->nb_tx_queues) {
4605                 txq = dev->data->tx_queues[tx_queue_id];
4606                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4607                 txdctl |= IXGBE_TXDCTL_ENABLE;
4608                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4609
4610                 /* Wait until TX Enable ready */
4611                 if (hw->mac.type == ixgbe_mac_82599EB) {
4612                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4613                         do {
4614                                 rte_delay_ms(1);
4615                                 txdctl = IXGBE_READ_REG(hw,
4616                                         IXGBE_TXDCTL(txq->reg_idx));
4617                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4618                         if (!poll_ms)
4619                                 PMD_INIT_LOG(ERR, "Could not enable "
4620                                              "Tx Queue %d", tx_queue_id);
4621                 }
4622                 rte_wmb();
4623                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4624                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4625                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4626         } else
4627                 return -1;
4628
4629         return 0;
4630 }
4631
4632 /*
4633  * Stop Transmit Units for specified queue.
4634  */
4635 int __attribute__((cold))
4636 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4637 {
4638         struct ixgbe_hw     *hw;
4639         struct ixgbe_tx_queue *txq;
4640         uint32_t txdctl;
4641         uint32_t txtdh, txtdt;
4642         int poll_ms;
4643
4644         PMD_INIT_FUNC_TRACE();
4645         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4646
4647         if (tx_queue_id < dev->data->nb_tx_queues) {
4648                 txq = dev->data->tx_queues[tx_queue_id];
4649
4650                 /* Wait until TX queue is empty */
4651                 if (hw->mac.type == ixgbe_mac_82599EB) {
4652                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4653                         do {
4654                                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4655                                 txtdh = IXGBE_READ_REG(hw,
4656                                                 IXGBE_TDH(txq->reg_idx));
4657                                 txtdt = IXGBE_READ_REG(hw,
4658                                                 IXGBE_TDT(txq->reg_idx));
4659                         } while (--poll_ms && (txtdh != txtdt));
4660                         if (!poll_ms)
4661                                 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4662                                              "when stopping.", tx_queue_id);
4663                 }
4664
4665                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4666                 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4667                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4668
4669                 /* Wait until TX Enable ready */
4670                 if (hw->mac.type == ixgbe_mac_82599EB) {
4671                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4672                         do {
4673                                 rte_delay_ms(1);
4674                                 txdctl = IXGBE_READ_REG(hw,
4675                                                 IXGBE_TXDCTL(txq->reg_idx));
4676                         } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4677                         if (!poll_ms)
4678                                 PMD_INIT_LOG(ERR, "Could not disable "
4679                                              "Tx Queue %d", tx_queue_id);
4680                 }
4681
4682                 if (txq->ops != NULL) {
4683                         txq->ops->release_mbufs(txq);
4684                         txq->ops->reset(txq);
4685                 }
4686                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4687         } else
4688                 return -1;
4689
4690         return 0;
4691 }
4692
4693 void
4694 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4695         struct rte_eth_rxq_info *qinfo)
4696 {
4697         struct ixgbe_rx_queue *rxq;
4698
4699         rxq = dev->data->rx_queues[queue_id];
4700
4701         qinfo->mp = rxq->mb_pool;
4702         qinfo->scattered_rx = dev->data->scattered_rx;
4703         qinfo->nb_desc = rxq->nb_rx_desc;
4704
4705         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4706         qinfo->conf.rx_drop_en = rxq->drop_en;
4707         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4708 }
4709
4710 void
4711 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4712         struct rte_eth_txq_info *qinfo)
4713 {
4714         struct ixgbe_tx_queue *txq;
4715
4716         txq = dev->data->tx_queues[queue_id];
4717
4718         qinfo->nb_desc = txq->nb_tx_desc;
4719
4720         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4721         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4722         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4723
4724         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4725         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4726         qinfo->conf.txq_flags = txq->txq_flags;
4727         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4728 }
4729
4730 /*
4731  * [VF] Initializes Receive Unit.
4732  */
4733 int __attribute__((cold))
4734 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4735 {
4736         struct ixgbe_hw     *hw;
4737         struct ixgbe_rx_queue *rxq;
4738         uint64_t bus_addr;
4739         uint32_t srrctl, psrtype = 0;
4740         uint16_t buf_size;
4741         uint16_t i;
4742         int ret;
4743
4744         PMD_INIT_FUNC_TRACE();
4745         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4746
4747         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4748                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4749                         "it should be power of 2");
4750                 return -1;
4751         }
4752
4753         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4754                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4755                         "it should be equal to or less than %d",
4756                         hw->mac.max_rx_queues);
4757                 return -1;
4758         }
4759
4760         /*
4761          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4762          * disables the VF receipt of packets if the PF MTU is > 1500.
4763          * This is done to deal with 82599 limitations that imposes
4764          * the PF and all VFs to share the same MTU.
4765          * Then, the PF driver enables again the VF receipt of packet when
4766          * the VF driver issues a IXGBE_VF_SET_LPE request.
4767          * In the meantime, the VF device cannot be used, even if the VF driver
4768          * and the Guest VM network stack are ready to accept packets with a
4769          * size up to the PF MTU.
4770          * As a work-around to this PF behaviour, force the call to
4771          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4772          * VF packets received can work in all cases.
4773          */
4774         ixgbevf_rlpml_set_vf(hw,
4775                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4776
4777         /* Setup RX queues */
4778         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4779                 rxq = dev->data->rx_queues[i];
4780
4781                 /* Allocate buffers for descriptor rings */
4782                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4783                 if (ret)
4784                         return ret;
4785
4786                 /* Setup the Base and Length of the Rx Descriptor Rings */
4787                 bus_addr = rxq->rx_ring_phys_addr;
4788
4789                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4790                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4791                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4792                                 (uint32_t)(bus_addr >> 32));
4793                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4794                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4795                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4796                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4797
4798
4799                 /* Configure the SRRCTL register */
4800 #ifdef RTE_HEADER_SPLIT_ENABLE
4801                 /*
4802                  * Configure Header Split
4803                  */
4804                 if (dev->data->dev_conf.rxmode.header_split) {
4805                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4806                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4807                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4808                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4809                 } else
4810 #endif
4811                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4812
4813                 /* Set if packets are dropped when no descriptors available */
4814                 if (rxq->drop_en)
4815                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4816
4817                 /*
4818                  * Configure the RX buffer size in the BSIZEPACKET field of
4819                  * the SRRCTL register of the queue.
4820                  * The value is in 1 KB resolution. Valid values can be from
4821                  * 1 KB to 16 KB.
4822                  */
4823                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4824                         RTE_PKTMBUF_HEADROOM);
4825                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4826                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4827
4828                 /*
4829                  * VF modification to write virtual function SRRCTL register
4830                  */
4831                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4832
4833                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4834                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4835
4836                 if (dev->data->dev_conf.rxmode.enable_scatter ||
4837                     /* It adds dual VLAN length for supporting dual VLAN */
4838                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4839                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4840                         if (!dev->data->scattered_rx)
4841                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4842                         dev->data->scattered_rx = 1;
4843                 }
4844         }
4845
4846 #ifdef RTE_HEADER_SPLIT_ENABLE
4847         if (dev->data->dev_conf.rxmode.header_split)
4848                 /* Must setup the PSRTYPE register */
4849                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4850                         IXGBE_PSRTYPE_UDPHDR   |
4851                         IXGBE_PSRTYPE_IPV4HDR  |
4852                         IXGBE_PSRTYPE_IPV6HDR;
4853 #endif
4854
4855         /* Set RQPL for VF RSS according to max Rx queue */
4856         psrtype |= (dev->data->nb_rx_queues >> 1) <<
4857                 IXGBE_PSRTYPE_RQPL_SHIFT;
4858         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4859
4860         ixgbe_set_rx_function(dev);
4861
4862         return 0;
4863 }
4864
4865 /*
4866  * [VF] Initializes Transmit Unit.
4867  */
4868 void __attribute__((cold))
4869 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4870 {
4871         struct ixgbe_hw     *hw;
4872         struct ixgbe_tx_queue *txq;
4873         uint64_t bus_addr;
4874         uint32_t txctrl;
4875         uint16_t i;
4876
4877         PMD_INIT_FUNC_TRACE();
4878         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4879
4880         /* Setup the Base and Length of the Tx Descriptor Rings */
4881         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4882                 txq = dev->data->tx_queues[i];
4883                 bus_addr = txq->tx_ring_phys_addr;
4884                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4885                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4886                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4887                                 (uint32_t)(bus_addr >> 32));
4888                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4889                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4890                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4891                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4892                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4893
4894                 /*
4895                  * Disable Tx Head Writeback RO bit, since this hoses
4896                  * bookkeeping if things aren't delivered in order.
4897                  */
4898                 txctrl = IXGBE_READ_REG(hw,
4899                                 IXGBE_VFDCA_TXCTRL(i));
4900                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4901                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4902                                 txctrl);
4903         }
4904 }
4905
4906 /*
4907  * [VF] Start Transmit and Receive Units.
4908  */
4909 void __attribute__((cold))
4910 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4911 {
4912         struct ixgbe_hw     *hw;
4913         struct ixgbe_tx_queue *txq;
4914         struct ixgbe_rx_queue *rxq;
4915         uint32_t txdctl;
4916         uint32_t rxdctl;
4917         uint16_t i;
4918         int poll_ms;
4919
4920         PMD_INIT_FUNC_TRACE();
4921         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4922
4923         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4924                 txq = dev->data->tx_queues[i];
4925                 /* Setup Transmit Threshold Registers */
4926                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4927                 txdctl |= txq->pthresh & 0x7F;
4928                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4929                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4930                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4931         }
4932
4933         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4934
4935                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4936                 txdctl |= IXGBE_TXDCTL_ENABLE;
4937                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4938
4939                 poll_ms = 10;
4940                 /* Wait until TX Enable ready */
4941                 do {
4942                         rte_delay_ms(1);
4943                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4944                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4945                 if (!poll_ms)
4946                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4947         }
4948         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4949
4950                 rxq = dev->data->rx_queues[i];
4951
4952                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4953                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4954                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4955
4956                 /* Wait until RX Enable ready */
4957                 poll_ms = 10;
4958                 do {
4959                         rte_delay_ms(1);
4960                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4961                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4962                 if (!poll_ms)
4963                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4964                 rte_wmb();
4965                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4966
4967         }
4968 }
4969
4970 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4971 int __attribute__((weak))
4972 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4973 {
4974         return -1;
4975 }
4976
4977 uint16_t __attribute__((weak))
4978 ixgbe_recv_pkts_vec(
4979         void __rte_unused *rx_queue,
4980         struct rte_mbuf __rte_unused **rx_pkts,
4981         uint16_t __rte_unused nb_pkts)
4982 {
4983         return 0;
4984 }
4985
4986 uint16_t __attribute__((weak))
4987 ixgbe_recv_scattered_pkts_vec(
4988         void __rte_unused *rx_queue,
4989         struct rte_mbuf __rte_unused **rx_pkts,
4990         uint16_t __rte_unused nb_pkts)
4991 {
4992         return 0;
4993 }
4994
4995 int __attribute__((weak))
4996 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
4997 {
4998         return -1;
4999 }