ixgbe: offload VxLAN and NVGRE Rx checksum on X550
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG)
89
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
92 {
93         struct rte_mbuf *m;
94
95         m = __rte_mbuf_raw_alloc(mp);
96         __rte_mbuf_sanity_check_raw(m, 0);
97         return m;
98 }
99
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while(0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i;
130
131         /* check DD bit on threshold descriptor */
132         status = txq->tx_ring[txq->tx_next_dd].wb.status;
133         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
134                 return 0;
135
136         /*
137          * first buffer to free from S/W ring is at index
138          * tx_next_dd - (tx_rs_thresh-1)
139          */
140         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
141
142         /* free buffers one at a time */
143         if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145                         txep->mbuf->next = NULL;
146                         rte_mempool_put(txep->mbuf->pool, txep->mbuf);
147                         txep->mbuf = NULL;
148                 }
149         } else {
150                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151                         rte_pktmbuf_free_seg(txep->mbuf);
152                         txep->mbuf = NULL;
153                 }
154         }
155
156         /* buffers were freed, update counters */
157         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159         if (txq->tx_next_dd >= txq->nb_tx_desc)
160                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
161
162         return txq->tx_rs_thresh;
163 }
164
165 /* Populate 4 descriptors with data from 4 mbufs */
166 static inline void
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
168 {
169         uint64_t buf_dma_addr;
170         uint32_t pkt_len;
171         int i;
172
173         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
175                 pkt_len = (*pkts)->data_len;
176
177                 /* write data to descriptor */
178                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
179
180                 txdp->read.cmd_type_len =
181                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
182
183                 txdp->read.olinfo_status =
184                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
185
186                 rte_prefetch0(&(*pkts)->pool);
187         }
188 }
189
190 /* Populate 1 descriptor with data from 1 mbuf */
191 static inline void
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
193 {
194         uint64_t buf_dma_addr;
195         uint32_t pkt_len;
196
197         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
198         pkt_len = (*pkts)->data_len;
199
200         /* write data to descriptor */
201         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202         txdp->read.cmd_type_len =
203                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204         txdp->read.olinfo_status =
205                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206         rte_prefetch0(&(*pkts)->pool);
207 }
208
209 /*
210  * Fill H/W descriptor ring with mbuf data.
211  * Copy mbuf pointers to the S/W ring.
212  */
213 static inline void
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
215                       uint16_t nb_pkts)
216 {
217         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219         const int N_PER_LOOP = 4;
220         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221         int mainpart, leftover;
222         int i, j;
223
224         /*
225          * Process most of the packets in chunks of N pkts.  Any
226          * leftover packets will get processed one at a time.
227          */
228         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
230         for (i = 0; i < mainpart; i += N_PER_LOOP) {
231                 /* Copy N mbuf pointers to the S/W ring */
232                 for (j = 0; j < N_PER_LOOP; ++j) {
233                         (txep + i + j)->mbuf = *(pkts + i + j);
234                 }
235                 tx4(txdp + i, pkts + i);
236         }
237
238         if (unlikely(leftover > 0)) {
239                 for (i = 0; i < leftover; ++i) {
240                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241                         tx1(txdp + mainpart + i, pkts + mainpart + i);
242                 }
243         }
244 }
245
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
248              uint16_t nb_pkts)
249 {
250         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
252         uint16_t n = 0;
253
254         /*
255          * Begin scanning the H/W ring for done descriptors when the
256          * number of available descriptors drops below tx_free_thresh.  For
257          * each done descriptor, free the associated buffer.
258          */
259         if (txq->nb_tx_free < txq->tx_free_thresh)
260                 ixgbe_tx_free_bufs(txq);
261
262         /* Only use descriptors that are available */
263         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264         if (unlikely(nb_pkts == 0))
265                 return 0;
266
267         /* Use exactly nb_pkts descriptors */
268         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
269
270         /*
271          * At this point, we know there are enough descriptors in the
272          * ring to transmit all the packets.  This assumes that each
273          * mbuf contains a single segment, and that no new offloads
274          * are expected, which would require a new context descriptor.
275          */
276
277         /*
278          * See if we're going to wrap-around. If so, handle the top
279          * of the descriptor ring first, then do the bottom.  If not,
280          * the processing looks just like the "bottom" part anyway...
281          */
282         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
285
286                 /*
287                  * We know that the last descriptor in the ring will need to
288                  * have its RS bit set because tx_rs_thresh has to be
289                  * a divisor of the ring size
290                  */
291                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
294
295                 txq->tx_tail = 0;
296         }
297
298         /* Fill H/W descriptor ring with mbuf data */
299         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
301
302         /*
303          * Determine if RS bit should be set
304          * This is what we actually want:
305          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306          * but instead of subtracting 1 and doing >=, we can just do
307          * greater than without subtracting.
308          */
309         if (txq->tx_tail > txq->tx_next_rs) {
310                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
313                                                 txq->tx_rs_thresh);
314                 if (txq->tx_next_rs >= txq->nb_tx_desc)
315                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
316         }
317
318         /*
319          * Check for wrap-around. This would only happen if we used
320          * up to the last descriptor in the ring, no more, no less.
321          */
322         if (txq->tx_tail >= txq->nb_tx_desc)
323                 txq->tx_tail = 0;
324
325         /* update tail pointer */
326         rte_wmb();
327         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
328
329         return nb_pkts;
330 }
331
332 uint16_t
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
334                        uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337
338         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
341
342         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
343         nb_tx = 0;
344         while (nb_pkts) {
345                 uint16_t ret, n;
346                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348                 nb_tx = (uint16_t)(nb_tx + ret);
349                 nb_pkts = (uint16_t)(nb_pkts - ret);
350                 if (ret < n)
351                         break;
352         }
353
354         return nb_tx;
355 }
356
357 static inline void
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
361 {
362         uint32_t type_tucmd_mlhl;
363         uint32_t mss_l4len_idx = 0;
364         uint32_t ctx_idx;
365         uint32_t vlan_macip_lens;
366         union ixgbe_tx_offload tx_offload_mask;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         txq->ctx_cache[ctx_idx].flags = ol_flags;
434         txq->ctx_cache[ctx_idx].tx_offload.data  =
435                 tx_offload_mask.data & tx_offload.data;
436         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
437
438         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
439         vlan_macip_lens = tx_offload.l3_len;
440         vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
441         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
442         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
443         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
444         ctx_txd->seqnum_seed     = 0;
445 }
446
447 /*
448  * Check which hardware context can be used. Use the existing match
449  * or create a new context descriptor.
450  */
451 static inline uint32_t
452 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
453                 union ixgbe_tx_offload tx_offload)
454 {
455         /* If match with the current used context */
456         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
457                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
458                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
459                         return txq->ctx_curr;
460         }
461
462         /* What if match with the next context  */
463         txq->ctx_curr ^= 1;
464         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
465                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
466                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
467                         return txq->ctx_curr;
468         }
469
470         /* Mismatch, use the previous context */
471         return IXGBE_CTX_NUM;
472 }
473
474 static inline uint32_t
475 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
476 {
477         uint32_t tmp = 0;
478         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
479                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
480         if (ol_flags & PKT_TX_IP_CKSUM)
481                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
482         if (ol_flags & PKT_TX_TCP_SEG)
483                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
484         return tmp;
485 }
486
487 static inline uint32_t
488 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
489 {
490         uint32_t cmdtype = 0;
491         if (ol_flags & PKT_TX_VLAN_PKT)
492                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
493         if (ol_flags & PKT_TX_TCP_SEG)
494                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
495         return cmdtype;
496 }
497
498 /* Default RS bit threshold values */
499 #ifndef DEFAULT_TX_RS_THRESH
500 #define DEFAULT_TX_RS_THRESH   32
501 #endif
502 #ifndef DEFAULT_TX_FREE_THRESH
503 #define DEFAULT_TX_FREE_THRESH 32
504 #endif
505
506 /* Reset transmit descriptors after they have been used */
507 static inline int
508 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
509 {
510         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
511         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
512         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
513         uint16_t nb_tx_desc = txq->nb_tx_desc;
514         uint16_t desc_to_clean_to;
515         uint16_t nb_tx_to_clean;
516         uint32_t status;
517
518         /* Determine the last descriptor needing to be cleaned */
519         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
520         if (desc_to_clean_to >= nb_tx_desc)
521                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
522
523         /* Check to make sure the last descriptor to clean is done */
524         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
525         status = txr[desc_to_clean_to].wb.status;
526         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
527         {
528                 PMD_TX_FREE_LOG(DEBUG,
529                                 "TX descriptor %4u is not done"
530                                 "(port=%d queue=%d)",
531                                 desc_to_clean_to,
532                                 txq->port_id, txq->queue_id);
533                 /* Failed to clean any descriptors, better luck next time */
534                 return -(1);
535         }
536
537         /* Figure out how many descriptors will be cleaned */
538         if (last_desc_cleaned > desc_to_clean_to)
539                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
540                                                         desc_to_clean_to);
541         else
542                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
543                                                 last_desc_cleaned);
544
545         PMD_TX_FREE_LOG(DEBUG,
546                         "Cleaning %4u TX descriptors: %4u to %4u "
547                         "(port=%d queue=%d)",
548                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
549                         txq->port_id, txq->queue_id);
550
551         /*
552          * The last descriptor to clean is done, so that means all the
553          * descriptors from the last descriptor that was cleaned
554          * up to the last descriptor with the RS bit set
555          * are done. Only reset the threshold descriptor.
556          */
557         txr[desc_to_clean_to].wb.status = 0;
558
559         /* Update the txq to reflect the last descriptor that was cleaned */
560         txq->last_desc_cleaned = desc_to_clean_to;
561         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
562
563         /* No Error */
564         return 0;
565 }
566
567 uint16_t
568 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
569                 uint16_t nb_pkts)
570 {
571         struct ixgbe_tx_queue *txq;
572         struct ixgbe_tx_entry *sw_ring;
573         struct ixgbe_tx_entry *txe, *txn;
574         volatile union ixgbe_adv_tx_desc *txr;
575         volatile union ixgbe_adv_tx_desc *txd, *txp;
576         struct rte_mbuf     *tx_pkt;
577         struct rte_mbuf     *m_seg;
578         uint64_t buf_dma_addr;
579         uint32_t olinfo_status;
580         uint32_t cmd_type_len;
581         uint32_t pkt_len;
582         uint16_t slen;
583         uint64_t ol_flags;
584         uint16_t tx_id;
585         uint16_t tx_last;
586         uint16_t nb_tx;
587         uint16_t nb_used;
588         uint64_t tx_ol_req;
589         uint32_t ctx = 0;
590         uint32_t new_ctx;
591         union ixgbe_tx_offload tx_offload = {0};
592
593         txq = tx_queue;
594         sw_ring = txq->sw_ring;
595         txr     = txq->tx_ring;
596         tx_id   = txq->tx_tail;
597         txe = &sw_ring[tx_id];
598         txp = NULL;
599
600         /* Determine if the descriptor ring needs to be cleaned. */
601         if (txq->nb_tx_free < txq->tx_free_thresh)
602                 ixgbe_xmit_cleanup(txq);
603
604         rte_prefetch0(&txe->mbuf->pool);
605
606         /* TX loop */
607         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
608                 new_ctx = 0;
609                 tx_pkt = *tx_pkts++;
610                 pkt_len = tx_pkt->pkt_len;
611
612                 /*
613                  * Determine how many (if any) context descriptors
614                  * are needed for offload functionality.
615                  */
616                 ol_flags = tx_pkt->ol_flags;
617
618                 /* If hardware offload required */
619                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
620                 if (tx_ol_req) {
621                         tx_offload.l2_len = tx_pkt->l2_len;
622                         tx_offload.l3_len = tx_pkt->l3_len;
623                         tx_offload.l4_len = tx_pkt->l4_len;
624                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
625                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
626
627                         /* If new context need be built or reuse the exist ctx. */
628                         ctx = what_advctx_update(txq, tx_ol_req,
629                                 tx_offload);
630                         /* Only allocate context descriptor if required*/
631                         new_ctx = (ctx == IXGBE_CTX_NUM);
632                         ctx = txq->ctx_curr;
633                 }
634
635                 /*
636                  * Keep track of how many descriptors are used this loop
637                  * This will always be the number of segments + the number of
638                  * Context descriptors required to transmit the packet
639                  */
640                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
641
642                 if (txp != NULL &&
643                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
644                         /* set RS on the previous packet in the burst */
645                         txp->read.cmd_type_len |=
646                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
647
648                 /*
649                  * The number of descriptors that must be allocated for a
650                  * packet is the number of segments of that packet, plus 1
651                  * Context Descriptor for the hardware offload, if any.
652                  * Determine the last TX descriptor to allocate in the TX ring
653                  * for the packet, starting from the current position (tx_id)
654                  * in the ring.
655                  */
656                 tx_last = (uint16_t) (tx_id + nb_used - 1);
657
658                 /* Circular ring */
659                 if (tx_last >= txq->nb_tx_desc)
660                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
661
662                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
663                            " tx_first=%u tx_last=%u",
664                            (unsigned) txq->port_id,
665                            (unsigned) txq->queue_id,
666                            (unsigned) pkt_len,
667                            (unsigned) tx_id,
668                            (unsigned) tx_last);
669
670                 /*
671                  * Make sure there are enough TX descriptors available to
672                  * transmit the entire packet.
673                  * nb_used better be less than or equal to txq->tx_rs_thresh
674                  */
675                 if (nb_used > txq->nb_tx_free) {
676                         PMD_TX_FREE_LOG(DEBUG,
677                                         "Not enough free TX descriptors "
678                                         "nb_used=%4u nb_free=%4u "
679                                         "(port=%d queue=%d)",
680                                         nb_used, txq->nb_tx_free,
681                                         txq->port_id, txq->queue_id);
682
683                         if (ixgbe_xmit_cleanup(txq) != 0) {
684                                 /* Could not clean any descriptors */
685                                 if (nb_tx == 0)
686                                         return 0;
687                                 goto end_of_tx;
688                         }
689
690                         /* nb_used better be <= txq->tx_rs_thresh */
691                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
692                                 PMD_TX_FREE_LOG(DEBUG,
693                                         "The number of descriptors needed to "
694                                         "transmit the packet exceeds the "
695                                         "RS bit threshold. This will impact "
696                                         "performance."
697                                         "nb_used=%4u nb_free=%4u "
698                                         "tx_rs_thresh=%4u. "
699                                         "(port=%d queue=%d)",
700                                         nb_used, txq->nb_tx_free,
701                                         txq->tx_rs_thresh,
702                                         txq->port_id, txq->queue_id);
703                                 /*
704                                  * Loop here until there are enough TX
705                                  * descriptors or until the ring cannot be
706                                  * cleaned.
707                                  */
708                                 while (nb_used > txq->nb_tx_free) {
709                                         if (ixgbe_xmit_cleanup(txq) != 0) {
710                                                 /*
711                                                  * Could not clean any
712                                                  * descriptors
713                                                  */
714                                                 if (nb_tx == 0)
715                                                         return 0;
716                                                 goto end_of_tx;
717                                         }
718                                 }
719                         }
720                 }
721
722                 /*
723                  * By now there are enough free TX descriptors to transmit
724                  * the packet.
725                  */
726
727                 /*
728                  * Set common flags of all TX Data Descriptors.
729                  *
730                  * The following bits must be set in all Data Descriptors:
731                  *   - IXGBE_ADVTXD_DTYP_DATA
732                  *   - IXGBE_ADVTXD_DCMD_DEXT
733                  *
734                  * The following bits must be set in the first Data Descriptor
735                  * and are ignored in the other ones:
736                  *   - IXGBE_ADVTXD_DCMD_IFCS
737                  *   - IXGBE_ADVTXD_MAC_1588
738                  *   - IXGBE_ADVTXD_DCMD_VLE
739                  *
740                  * The following bits must only be set in the last Data
741                  * Descriptor:
742                  *   - IXGBE_TXD_CMD_EOP
743                  *
744                  * The following bits can be set in any Data Descriptor, but
745                  * are only set in the last Data Descriptor:
746                  *   - IXGBE_TXD_CMD_RS
747                  */
748                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
749                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
750
751 #ifdef RTE_LIBRTE_IEEE1588
752                 if (ol_flags & PKT_TX_IEEE1588_TMST)
753                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
754 #endif
755
756                 olinfo_status = 0;
757                 if (tx_ol_req) {
758
759                         if (ol_flags & PKT_TX_TCP_SEG) {
760                                 /* when TSO is on, paylen in descriptor is the
761                                  * not the packet len but the tcp payload len */
762                                 pkt_len -= (tx_offload.l2_len +
763                                         tx_offload.l3_len + tx_offload.l4_len);
764                         }
765
766                         /*
767                          * Setup the TX Advanced Context Descriptor if required
768                          */
769                         if (new_ctx) {
770                                 volatile struct ixgbe_adv_tx_context_desc *
771                                     ctx_txd;
772
773                                 ctx_txd = (volatile struct
774                                     ixgbe_adv_tx_context_desc *)
775                                     &txr[tx_id];
776
777                                 txn = &sw_ring[txe->next_id];
778                                 rte_prefetch0(&txn->mbuf->pool);
779
780                                 if (txe->mbuf != NULL) {
781                                         rte_pktmbuf_free_seg(txe->mbuf);
782                                         txe->mbuf = NULL;
783                                 }
784
785                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
786                                         tx_offload);
787
788                                 txe->last_id = tx_last;
789                                 tx_id = txe->next_id;
790                                 txe = txn;
791                         }
792
793                         /*
794                          * Setup the TX Advanced Data Descriptor,
795                          * This path will go through
796                          * whatever new/reuse the context descriptor
797                          */
798                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
799                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
800                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
801                 }
802
803                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
804
805                 m_seg = tx_pkt;
806                 do {
807                         txd = &txr[tx_id];
808                         txn = &sw_ring[txe->next_id];
809                         rte_prefetch0(&txn->mbuf->pool);
810
811                         if (txe->mbuf != NULL)
812                                 rte_pktmbuf_free_seg(txe->mbuf);
813                         txe->mbuf = m_seg;
814
815                         /*
816                          * Set up Transmit Data Descriptor.
817                          */
818                         slen = m_seg->data_len;
819                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
820                         txd->read.buffer_addr =
821                                 rte_cpu_to_le_64(buf_dma_addr);
822                         txd->read.cmd_type_len =
823                                 rte_cpu_to_le_32(cmd_type_len | slen);
824                         txd->read.olinfo_status =
825                                 rte_cpu_to_le_32(olinfo_status);
826                         txe->last_id = tx_last;
827                         tx_id = txe->next_id;
828                         txe = txn;
829                         m_seg = m_seg->next;
830                 } while (m_seg != NULL);
831
832                 /*
833                  * The last packet data descriptor needs End Of Packet (EOP)
834                  */
835                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
836                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
837                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
838
839                 /* Set RS bit only on threshold packets' last descriptor */
840                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
841                         PMD_TX_FREE_LOG(DEBUG,
842                                         "Setting RS bit on TXD id="
843                                         "%4u (port=%d queue=%d)",
844                                         tx_last, txq->port_id, txq->queue_id);
845
846                         cmd_type_len |= IXGBE_TXD_CMD_RS;
847
848                         /* Update txq RS bit counters */
849                         txq->nb_tx_used = 0;
850                         txp = NULL;
851                 } else
852                         txp = txd;
853
854                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
855         }
856
857 end_of_tx:
858         /* set RS on last packet in the burst */
859         if (txp != NULL)
860                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
861
862         rte_wmb();
863
864         /*
865          * Set the Transmit Descriptor Tail (TDT)
866          */
867         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
868                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
869                    (unsigned) tx_id, (unsigned) nb_tx);
870         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
871         txq->tx_tail = tx_id;
872
873         return nb_tx;
874 }
875
876 /*********************************************************************
877  *
878  *  RX functions
879  *
880  **********************************************************************/
881 #define IXGBE_PACKET_TYPE_IPV4              0X01
882 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
883 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
884 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
885 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
886 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
887 #define IXGBE_PACKET_TYPE_IPV6              0X04
888 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
889 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
890 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
891 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
892 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
893 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
894 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
895 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
896 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
897 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
898 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
899 #define IXGBE_PACKET_TYPE_MAX               0X80
900 #define IXGBE_PACKET_TYPE_MASK              0X7F
901 #define IXGBE_PACKET_TYPE_SHIFT             0X04
902 static inline uint32_t
903 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
904 {
905         static const uint32_t
906                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
907                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
908                         RTE_PTYPE_L3_IPV4,
909                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
910                         RTE_PTYPE_L3_IPV4_EXT,
911                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
912                         RTE_PTYPE_L3_IPV6,
913                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
914                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
915                         RTE_PTYPE_INNER_L3_IPV6,
916                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
917                         RTE_PTYPE_L3_IPV6_EXT,
918                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
919                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
920                         RTE_PTYPE_INNER_L3_IPV6_EXT,
921                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
922                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
923                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
924                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
925                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
926                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
927                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
928                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
929                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
930                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
931                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
932                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
933                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
934                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
935                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
936                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
937                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
938                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
939                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
940                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
941                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
942                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
943                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
944                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
945                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
946                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
947                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
948                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
949         };
950         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
951                 return RTE_PTYPE_UNKNOWN;
952
953         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
954                                 IXGBE_PACKET_TYPE_MASK;
955
956         return ptype_table[pkt_info];
957 }
958
959 static inline uint64_t
960 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
961 {
962         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
963                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
964                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
965                 PKT_RX_RSS_HASH, 0, 0, 0,
966                 0, 0, 0,  PKT_RX_FDIR,
967         };
968 #ifdef RTE_LIBRTE_IEEE1588
969         static uint64_t ip_pkt_etqf_map[8] = {
970                 0, 0, 0, PKT_RX_IEEE1588_PTP,
971                 0, 0, 0, 0,
972         };
973
974         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
975                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
976                                 ip_rss_types_map[pkt_info & 0XF];
977         else
978                 return ip_rss_types_map[pkt_info & 0XF];
979 #else
980         return ip_rss_types_map[pkt_info & 0XF];
981 #endif
982 }
983
984 static inline uint64_t
985 rx_desc_status_to_pkt_flags(uint32_t rx_status)
986 {
987         uint64_t pkt_flags;
988
989         /*
990          * Check if VLAN present only.
991          * Do not check whether L3/L4 rx checksum done by NIC or not,
992          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
993          */
994         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
995
996 #ifdef RTE_LIBRTE_IEEE1588
997         if (rx_status & IXGBE_RXD_STAT_TMST)
998                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
999 #endif
1000         return pkt_flags;
1001 }
1002
1003 static inline uint64_t
1004 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1005 {
1006         uint64_t pkt_flags;
1007
1008         /*
1009          * Bit 31: IPE, IPv4 checksum error
1010          * Bit 30: L4I, L4I integrity error
1011          */
1012         static uint64_t error_to_pkt_flags_map[4] = {
1013                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1014                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1015         };
1016         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1017                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1018
1019         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1020             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1021                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1022         }
1023
1024         return pkt_flags;
1025 }
1026
1027 /*
1028  * LOOK_AHEAD defines how many desc statuses to check beyond the
1029  * current descriptor.
1030  * It must be a pound define for optimal performance.
1031  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1032  * function only works with LOOK_AHEAD=8.
1033  */
1034 #define LOOK_AHEAD 8
1035 #if (LOOK_AHEAD != 8)
1036 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1037 #endif
1038 static inline int
1039 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1040 {
1041         volatile union ixgbe_adv_rx_desc *rxdp;
1042         struct ixgbe_rx_entry *rxep;
1043         struct rte_mbuf *mb;
1044         uint16_t pkt_len;
1045         uint64_t pkt_flags;
1046         int nb_dd;
1047         uint32_t s[LOOK_AHEAD];
1048         uint16_t pkt_info[LOOK_AHEAD];
1049         int i, j, nb_rx = 0;
1050         uint32_t status;
1051
1052         /* get references to current descriptor and S/W ring entry */
1053         rxdp = &rxq->rx_ring[rxq->rx_tail];
1054         rxep = &rxq->sw_ring[rxq->rx_tail];
1055
1056         status = rxdp->wb.upper.status_error;
1057         /* check to make sure there is at least 1 packet to receive */
1058         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1059                 return 0;
1060
1061         /*
1062          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1063          * reference packets that are ready to be received.
1064          */
1065         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1066              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1067         {
1068                 /* Read desc statuses backwards to avoid race condition */
1069                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1070                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1071
1072                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1073                         pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1074                                                 hs_rss.pkt_info;
1075
1076                 /* Compute how many status bits were set */
1077                 nb_dd = 0;
1078                 for (j = 0; j < LOOK_AHEAD; ++j)
1079                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1080
1081                 nb_rx += nb_dd;
1082
1083                 /* Translate descriptor info to mbuf format */
1084                 for (j = 0; j < nb_dd; ++j) {
1085                         mb = rxep[j].mbuf;
1086                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1087                                   rxq->crc_len;
1088                         mb->data_len = pkt_len;
1089                         mb->pkt_len = pkt_len;
1090                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1091
1092                         /* convert descriptor fields to rte mbuf flags */
1093                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1094                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1095                         pkt_flags |=
1096                                 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1097                         mb->ol_flags = pkt_flags;
1098                         mb->packet_type =
1099                                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1100
1101                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1102                                 mb->hash.rss = rte_le_to_cpu_32(
1103                                     rxdp[j].wb.lower.hi_dword.rss);
1104                         else if (pkt_flags & PKT_RX_FDIR) {
1105                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1106                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1107                                     IXGBE_ATR_HASH_MASK;
1108                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1109                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1110                         }
1111                 }
1112
1113                 /* Move mbuf pointers from the S/W ring to the stage */
1114                 for (j = 0; j < LOOK_AHEAD; ++j) {
1115                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1116                 }
1117
1118                 /* stop if all requested packets could not be received */
1119                 if (nb_dd != LOOK_AHEAD)
1120                         break;
1121         }
1122
1123         /* clear software ring entries so we can cleanup correctly */
1124         for (i = 0; i < nb_rx; ++i) {
1125                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1126         }
1127
1128
1129         return nb_rx;
1130 }
1131
1132 static inline int
1133 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1134 {
1135         volatile union ixgbe_adv_rx_desc *rxdp;
1136         struct ixgbe_rx_entry *rxep;
1137         struct rte_mbuf *mb;
1138         uint16_t alloc_idx;
1139         __le64 dma_addr;
1140         int diag, i;
1141
1142         /* allocate buffers in bulk directly into the S/W ring */
1143         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1144         rxep = &rxq->sw_ring[alloc_idx];
1145         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1146                                     rxq->rx_free_thresh);
1147         if (unlikely(diag != 0))
1148                 return -ENOMEM;
1149
1150         rxdp = &rxq->rx_ring[alloc_idx];
1151         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1152                 /* populate the static rte mbuf fields */
1153                 mb = rxep[i].mbuf;
1154                 if (reset_mbuf) {
1155                         mb->next = NULL;
1156                         mb->nb_segs = 1;
1157                         mb->port = rxq->port_id;
1158                 }
1159
1160                 rte_mbuf_refcnt_set(mb, 1);
1161                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1162
1163                 /* populate the descriptors */
1164                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1165                 rxdp[i].read.hdr_addr = 0;
1166                 rxdp[i].read.pkt_addr = dma_addr;
1167         }
1168
1169         /* update state of internal queue structure */
1170         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1171         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1172                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1173
1174         /* no errors */
1175         return 0;
1176 }
1177
1178 static inline uint16_t
1179 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1180                          uint16_t nb_pkts)
1181 {
1182         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1183         int i;
1184
1185         /* how many packets are ready to return? */
1186         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1187
1188         /* copy mbuf pointers to the application's packet list */
1189         for (i = 0; i < nb_pkts; ++i)
1190                 rx_pkts[i] = stage[i];
1191
1192         /* update internal queue state */
1193         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1194         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1195
1196         return nb_pkts;
1197 }
1198
1199 static inline uint16_t
1200 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1201              uint16_t nb_pkts)
1202 {
1203         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1204         uint16_t nb_rx = 0;
1205
1206         /* Any previously recv'd pkts will be returned from the Rx stage */
1207         if (rxq->rx_nb_avail)
1208                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1209
1210         /* Scan the H/W ring for packets to receive */
1211         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1212
1213         /* update internal queue state */
1214         rxq->rx_next_avail = 0;
1215         rxq->rx_nb_avail = nb_rx;
1216         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1217
1218         /* if required, allocate new buffers to replenish descriptors */
1219         if (rxq->rx_tail > rxq->rx_free_trigger) {
1220                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1221
1222                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1223                         int i, j;
1224                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1225                                    "queue_id=%u", (unsigned) rxq->port_id,
1226                                    (unsigned) rxq->queue_id);
1227
1228                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1229                                 rxq->rx_free_thresh;
1230
1231                         /*
1232                          * Need to rewind any previous receives if we cannot
1233                          * allocate new buffers to replenish the old ones.
1234                          */
1235                         rxq->rx_nb_avail = 0;
1236                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1237                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1238                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1239
1240                         return 0;
1241                 }
1242
1243                 /* update tail pointer */
1244                 rte_wmb();
1245                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1246         }
1247
1248         if (rxq->rx_tail >= rxq->nb_rx_desc)
1249                 rxq->rx_tail = 0;
1250
1251         /* received any packets this loop? */
1252         if (rxq->rx_nb_avail)
1253                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1254
1255         return 0;
1256 }
1257
1258 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1259 static uint16_t
1260 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1261                            uint16_t nb_pkts)
1262 {
1263         uint16_t nb_rx;
1264
1265         if (unlikely(nb_pkts == 0))
1266                 return 0;
1267
1268         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1269                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1270
1271         /* request is relatively large, chunk it up */
1272         nb_rx = 0;
1273         while (nb_pkts) {
1274                 uint16_t ret, n;
1275                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1276                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1277                 nb_rx = (uint16_t)(nb_rx + ret);
1278                 nb_pkts = (uint16_t)(nb_pkts - ret);
1279                 if (ret < n)
1280                         break;
1281         }
1282
1283         return nb_rx;
1284 }
1285
1286 uint16_t
1287 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1288                 uint16_t nb_pkts)
1289 {
1290         struct ixgbe_rx_queue *rxq;
1291         volatile union ixgbe_adv_rx_desc *rx_ring;
1292         volatile union ixgbe_adv_rx_desc *rxdp;
1293         struct ixgbe_rx_entry *sw_ring;
1294         struct ixgbe_rx_entry *rxe;
1295         struct rte_mbuf *rxm;
1296         struct rte_mbuf *nmb;
1297         union ixgbe_adv_rx_desc rxd;
1298         uint64_t dma_addr;
1299         uint32_t staterr;
1300         uint32_t pkt_info;
1301         uint16_t pkt_len;
1302         uint16_t rx_id;
1303         uint16_t nb_rx;
1304         uint16_t nb_hold;
1305         uint64_t pkt_flags;
1306
1307         nb_rx = 0;
1308         nb_hold = 0;
1309         rxq = rx_queue;
1310         rx_id = rxq->rx_tail;
1311         rx_ring = rxq->rx_ring;
1312         sw_ring = rxq->sw_ring;
1313         while (nb_rx < nb_pkts) {
1314                 /*
1315                  * The order of operations here is important as the DD status
1316                  * bit must not be read after any other descriptor fields.
1317                  * rx_ring and rxdp are pointing to volatile data so the order
1318                  * of accesses cannot be reordered by the compiler. If they were
1319                  * not volatile, they could be reordered which could lead to
1320                  * using invalid descriptor fields when read from rxd.
1321                  */
1322                 rxdp = &rx_ring[rx_id];
1323                 staterr = rxdp->wb.upper.status_error;
1324                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1325                         break;
1326                 rxd = *rxdp;
1327
1328                 /*
1329                  * End of packet.
1330                  *
1331                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1332                  * is likely to be invalid and to be dropped by the various
1333                  * validation checks performed by the network stack.
1334                  *
1335                  * Allocate a new mbuf to replenish the RX ring descriptor.
1336                  * If the allocation fails:
1337                  *    - arrange for that RX descriptor to be the first one
1338                  *      being parsed the next time the receive function is
1339                  *      invoked [on the same queue].
1340                  *
1341                  *    - Stop parsing the RX ring and return immediately.
1342                  *
1343                  * This policy do not drop the packet received in the RX
1344                  * descriptor for which the allocation of a new mbuf failed.
1345                  * Thus, it allows that packet to be later retrieved if
1346                  * mbuf have been freed in the mean time.
1347                  * As a side effect, holding RX descriptors instead of
1348                  * systematically giving them back to the NIC may lead to
1349                  * RX ring exhaustion situations.
1350                  * However, the NIC can gracefully prevent such situations
1351                  * to happen by sending specific "back-pressure" flow control
1352                  * frames to its peer(s).
1353                  */
1354                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1355                            "ext_err_stat=0x%08x pkt_len=%u",
1356                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1357                            (unsigned) rx_id, (unsigned) staterr,
1358                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1359
1360                 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1361                 if (nmb == NULL) {
1362                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1363                                    "queue_id=%u", (unsigned) rxq->port_id,
1364                                    (unsigned) rxq->queue_id);
1365                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1366                         break;
1367                 }
1368
1369                 nb_hold++;
1370                 rxe = &sw_ring[rx_id];
1371                 rx_id++;
1372                 if (rx_id == rxq->nb_rx_desc)
1373                         rx_id = 0;
1374
1375                 /* Prefetch next mbuf while processing current one. */
1376                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1377
1378                 /*
1379                  * When next RX descriptor is on a cache-line boundary,
1380                  * prefetch the next 4 RX descriptors and the next 8 pointers
1381                  * to mbufs.
1382                  */
1383                 if ((rx_id & 0x3) == 0) {
1384                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1385                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1386                 }
1387
1388                 rxm = rxe->mbuf;
1389                 rxe->mbuf = nmb;
1390                 dma_addr =
1391                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1392                 rxdp->read.hdr_addr = 0;
1393                 rxdp->read.pkt_addr = dma_addr;
1394
1395                 /*
1396                  * Initialize the returned mbuf.
1397                  * 1) setup generic mbuf fields:
1398                  *    - number of segments,
1399                  *    - next segment,
1400                  *    - packet length,
1401                  *    - RX port identifier.
1402                  * 2) integrate hardware offload data, if any:
1403                  *    - RSS flag & hash,
1404                  *    - IP checksum flag,
1405                  *    - VLAN TCI, if any,
1406                  *    - error flags.
1407                  */
1408                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1409                                       rxq->crc_len);
1410                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1411                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1412                 rxm->nb_segs = 1;
1413                 rxm->next = NULL;
1414                 rxm->pkt_len = pkt_len;
1415                 rxm->data_len = pkt_len;
1416                 rxm->port = rxq->port_id;
1417
1418                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1419                                                                 pkt_info);
1420                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1421                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1422
1423                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1424                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1425                 pkt_flags = pkt_flags |
1426                         ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1427                 rxm->ol_flags = pkt_flags;
1428                 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1429
1430                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1431                         rxm->hash.rss = rte_le_to_cpu_32(
1432                                                 rxd.wb.lower.hi_dword.rss);
1433                 else if (pkt_flags & PKT_RX_FDIR) {
1434                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1435                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1436                                         IXGBE_ATR_HASH_MASK;
1437                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1438                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1439                 }
1440                 /*
1441                  * Store the mbuf address into the next entry of the array
1442                  * of returned packets.
1443                  */
1444                 rx_pkts[nb_rx++] = rxm;
1445         }
1446         rxq->rx_tail = rx_id;
1447
1448         /*
1449          * If the number of free RX descriptors is greater than the RX free
1450          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1451          * register.
1452          * Update the RDT with the value of the last processed RX descriptor
1453          * minus 1, to guarantee that the RDT register is never equal to the
1454          * RDH register, which creates a "full" ring situtation from the
1455          * hardware point of view...
1456          */
1457         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1458         if (nb_hold > rxq->rx_free_thresh) {
1459                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1460                            "nb_hold=%u nb_rx=%u",
1461                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1462                            (unsigned) rx_id, (unsigned) nb_hold,
1463                            (unsigned) nb_rx);
1464                 rx_id = (uint16_t) ((rx_id == 0) ?
1465                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1466                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1467                 nb_hold = 0;
1468         }
1469         rxq->nb_rx_hold = nb_hold;
1470         return nb_rx;
1471 }
1472
1473 /**
1474  * Detect an RSC descriptor.
1475  */
1476 static inline uint32_t
1477 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1478 {
1479         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1480                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1481 }
1482
1483 /**
1484  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1485  *
1486  * Fill the following info in the HEAD buffer of the Rx cluster:
1487  *    - RX port identifier
1488  *    - hardware offload data, if any:
1489  *      - RSS flag & hash
1490  *      - IP checksum flag
1491  *      - VLAN TCI, if any
1492  *      - error flags
1493  * @head HEAD of the packet cluster
1494  * @desc HW descriptor to get data from
1495  * @port_id Port ID of the Rx queue
1496  */
1497 static inline void
1498 ixgbe_fill_cluster_head_buf(
1499         struct rte_mbuf *head,
1500         union ixgbe_adv_rx_desc *desc,
1501         uint8_t port_id,
1502         uint32_t staterr)
1503 {
1504         uint16_t pkt_info;
1505         uint64_t pkt_flags;
1506
1507         head->port = port_id;
1508
1509         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1510          * set in the pkt_flags field.
1511          */
1512         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1513         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1514         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1515         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1516         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1517         head->ol_flags = pkt_flags;
1518         head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1519
1520         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1521                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1522         else if (pkt_flags & PKT_RX_FDIR) {
1523                 head->hash.fdir.hash =
1524                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1525                                                           & IXGBE_ATR_HASH_MASK;
1526                 head->hash.fdir.id =
1527                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1528         }
1529 }
1530
1531 /**
1532  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1533  *
1534  * @rx_queue Rx queue handle
1535  * @rx_pkts table of received packets
1536  * @nb_pkts size of rx_pkts table
1537  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1538  *
1539  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1540  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1541  *
1542  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1543  * 1) When non-EOP RSC completion arrives:
1544  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1545  *       segment's data length.
1546  *    b) Set the "next" pointer of the current segment to point to the segment
1547  *       at the NEXTP index.
1548  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1549  *       in the sw_rsc_ring.
1550  * 2) When EOP arrives we just update the cluster's total length and offload
1551  *    flags and deliver the cluster up to the upper layers. In our case - put it
1552  *    in the rx_pkts table.
1553  *
1554  * Returns the number of received packets/clusters (according to the "bulk
1555  * receive" interface).
1556  */
1557 static inline uint16_t
1558 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1559                     bool bulk_alloc)
1560 {
1561         struct ixgbe_rx_queue *rxq = rx_queue;
1562         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1563         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1564         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1565         uint16_t rx_id = rxq->rx_tail;
1566         uint16_t nb_rx = 0;
1567         uint16_t nb_hold = rxq->nb_rx_hold;
1568         uint16_t prev_id = rxq->rx_tail;
1569
1570         while (nb_rx < nb_pkts) {
1571                 bool eop;
1572                 struct ixgbe_rx_entry *rxe;
1573                 struct ixgbe_scattered_rx_entry *sc_entry;
1574                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1575                 struct ixgbe_rx_entry *next_rxe;
1576                 struct rte_mbuf *first_seg;
1577                 struct rte_mbuf *rxm;
1578                 struct rte_mbuf *nmb;
1579                 union ixgbe_adv_rx_desc rxd;
1580                 uint16_t data_len;
1581                 uint16_t next_id;
1582                 volatile union ixgbe_adv_rx_desc *rxdp;
1583                 uint32_t staterr;
1584
1585 next_desc:
1586                 /*
1587                  * The code in this whole file uses the volatile pointer to
1588                  * ensure the read ordering of the status and the rest of the
1589                  * descriptor fields (on the compiler level only!!!). This is so
1590                  * UGLY - why not to just use the compiler barrier instead? DPDK
1591                  * even has the rte_compiler_barrier() for that.
1592                  *
1593                  * But most importantly this is just wrong because this doesn't
1594                  * ensure memory ordering in a general case at all. For
1595                  * instance, DPDK is supposed to work on Power CPUs where
1596                  * compiler barrier may just not be enough!
1597                  *
1598                  * I tried to write only this function properly to have a
1599                  * starting point (as a part of an LRO/RSC series) but the
1600                  * compiler cursed at me when I tried to cast away the
1601                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1602                  * keeping it the way it is for now.
1603                  *
1604                  * The code in this file is broken in so many other places and
1605                  * will just not work on a big endian CPU anyway therefore the
1606                  * lines below will have to be revisited together with the rest
1607                  * of the ixgbe PMD.
1608                  *
1609                  * TODO:
1610                  *    - Get rid of "volatile" crap and let the compiler do its
1611                  *      job.
1612                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1613                  *      memory ordering below.
1614                  */
1615                 rxdp = &rx_ring[rx_id];
1616                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1617
1618                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1619                         break;
1620
1621                 rxd = *rxdp;
1622
1623                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1624                                   "staterr=0x%x data_len=%u",
1625                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1626                            rte_le_to_cpu_16(rxd.wb.upper.length));
1627
1628                 if (!bulk_alloc) {
1629                         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1630                         if (nmb == NULL) {
1631                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1632                                                   "port_id=%u queue_id=%u",
1633                                            rxq->port_id, rxq->queue_id);
1634
1635                                 rte_eth_devices[rxq->port_id].data->
1636                                                         rx_mbuf_alloc_failed++;
1637                                 break;
1638                         }
1639                 }
1640                 else if (nb_hold > rxq->rx_free_thresh) {
1641                         uint16_t next_rdt = rxq->rx_free_trigger;
1642
1643                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1644                                 rte_wmb();
1645                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1646                                                     next_rdt);
1647                                 nb_hold -= rxq->rx_free_thresh;
1648                         } else {
1649                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1650                                                   "port_id=%u queue_id=%u",
1651                                            rxq->port_id, rxq->queue_id);
1652
1653                                 rte_eth_devices[rxq->port_id].data->
1654                                                         rx_mbuf_alloc_failed++;
1655                                 break;
1656                         }
1657                 }
1658
1659                 nb_hold++;
1660                 rxe = &sw_ring[rx_id];
1661                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1662
1663                 next_id = rx_id + 1;
1664                 if (next_id == rxq->nb_rx_desc)
1665                         next_id = 0;
1666
1667                 /* Prefetch next mbuf while processing current one. */
1668                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1669
1670                 /*
1671                  * When next RX descriptor is on a cache-line boundary,
1672                  * prefetch the next 4 RX descriptors and the next 4 pointers
1673                  * to mbufs.
1674                  */
1675                 if ((next_id & 0x3) == 0) {
1676                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1677                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1678                 }
1679
1680                 rxm = rxe->mbuf;
1681
1682                 if (!bulk_alloc) {
1683                         __le64 dma =
1684                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1685                         /*
1686                          * Update RX descriptor with the physical address of the
1687                          * new data buffer of the new allocated mbuf.
1688                          */
1689                         rxe->mbuf = nmb;
1690
1691                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1692                         rxdp->read.hdr_addr = 0;
1693                         rxdp->read.pkt_addr = dma;
1694                 } else
1695                         rxe->mbuf = NULL;
1696
1697                 /*
1698                  * Set data length & data buffer address of mbuf.
1699                  */
1700                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1701                 rxm->data_len = data_len;
1702
1703                 if (!eop) {
1704                         uint16_t nextp_id;
1705                         /*
1706                          * Get next descriptor index:
1707                          *  - For RSC it's in the NEXTP field.
1708                          *  - For a scattered packet - it's just a following
1709                          *    descriptor.
1710                          */
1711                         if (ixgbe_rsc_count(&rxd))
1712                                 nextp_id =
1713                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1714                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1715                         else
1716                                 nextp_id = next_id;
1717
1718                         next_sc_entry = &sw_sc_ring[nextp_id];
1719                         next_rxe = &sw_ring[nextp_id];
1720                         rte_ixgbe_prefetch(next_rxe);
1721                 }
1722
1723                 sc_entry = &sw_sc_ring[rx_id];
1724                 first_seg = sc_entry->fbuf;
1725                 sc_entry->fbuf = NULL;
1726
1727                 /*
1728                  * If this is the first buffer of the received packet,
1729                  * set the pointer to the first mbuf of the packet and
1730                  * initialize its context.
1731                  * Otherwise, update the total length and the number of segments
1732                  * of the current scattered packet, and update the pointer to
1733                  * the last mbuf of the current packet.
1734                  */
1735                 if (first_seg == NULL) {
1736                         first_seg = rxm;
1737                         first_seg->pkt_len = data_len;
1738                         first_seg->nb_segs = 1;
1739                 } else {
1740                         first_seg->pkt_len += data_len;
1741                         first_seg->nb_segs++;
1742                 }
1743
1744                 prev_id = rx_id;
1745                 rx_id = next_id;
1746
1747                 /*
1748                  * If this is not the last buffer of the received packet, update
1749                  * the pointer to the first mbuf at the NEXTP entry in the
1750                  * sw_sc_ring and continue to parse the RX ring.
1751                  */
1752                 if (!eop) {
1753                         rxm->next = next_rxe->mbuf;
1754                         next_sc_entry->fbuf = first_seg;
1755                         goto next_desc;
1756                 }
1757
1758                 /*
1759                  * This is the last buffer of the received packet - return
1760                  * the current cluster to the user.
1761                  */
1762                 rxm->next = NULL;
1763
1764                 /* Initialize the first mbuf of the returned packet */
1765                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1766                                             staterr);
1767
1768                 /*
1769                  * Deal with the case, when HW CRC srip is disabled.
1770                  * That can't happen when LRO is enabled, but still could
1771                  * happen for scattered RX mode.
1772                  */
1773                 first_seg->pkt_len -= rxq->crc_len;
1774                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1775                         struct rte_mbuf *lp;
1776
1777                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
1778                                 ;
1779
1780                         first_seg->nb_segs--;
1781                         lp->data_len -= rxq->crc_len - rxm->data_len;
1782                         lp->next = NULL;
1783                         rte_pktmbuf_free_seg(rxm);
1784                 } else
1785                         rxm->data_len -= rxq->crc_len;
1786
1787                 /* Prefetch data of first segment, if configured to do so. */
1788                 rte_packet_prefetch((char *)first_seg->buf_addr +
1789                         first_seg->data_off);
1790
1791                 /*
1792                  * Store the mbuf address into the next entry of the array
1793                  * of returned packets.
1794                  */
1795                 rx_pkts[nb_rx++] = first_seg;
1796         }
1797
1798         /*
1799          * Record index of the next RX descriptor to probe.
1800          */
1801         rxq->rx_tail = rx_id;
1802
1803         /*
1804          * If the number of free RX descriptors is greater than the RX free
1805          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1806          * register.
1807          * Update the RDT with the value of the last processed RX descriptor
1808          * minus 1, to guarantee that the RDT register is never equal to the
1809          * RDH register, which creates a "full" ring situtation from the
1810          * hardware point of view...
1811          */
1812         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1813                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1814                            "nb_hold=%u nb_rx=%u",
1815                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1816
1817                 rte_wmb();
1818                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1819                 nb_hold = 0;
1820         }
1821
1822         rxq->nb_rx_hold = nb_hold;
1823         return nb_rx;
1824 }
1825
1826 uint16_t
1827 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1828                                  uint16_t nb_pkts)
1829 {
1830         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1831 }
1832
1833 uint16_t
1834 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1835                                uint16_t nb_pkts)
1836 {
1837         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1838 }
1839
1840 /*********************************************************************
1841  *
1842  *  Queue management functions
1843  *
1844  **********************************************************************/
1845
1846 static void __attribute__((cold))
1847 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1848 {
1849         unsigned i;
1850
1851         if (txq->sw_ring != NULL) {
1852                 for (i = 0; i < txq->nb_tx_desc; i++) {
1853                         if (txq->sw_ring[i].mbuf != NULL) {
1854                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1855                                 txq->sw_ring[i].mbuf = NULL;
1856                         }
1857                 }
1858         }
1859 }
1860
1861 static void __attribute__((cold))
1862 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1863 {
1864         if (txq != NULL &&
1865             txq->sw_ring != NULL)
1866                 rte_free(txq->sw_ring);
1867 }
1868
1869 static void __attribute__((cold))
1870 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1871 {
1872         if (txq != NULL && txq->ops != NULL) {
1873                 txq->ops->release_mbufs(txq);
1874                 txq->ops->free_swring(txq);
1875                 rte_free(txq);
1876         }
1877 }
1878
1879 void __attribute__((cold))
1880 ixgbe_dev_tx_queue_release(void *txq)
1881 {
1882         ixgbe_tx_queue_release(txq);
1883 }
1884
1885 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1886 static void __attribute__((cold))
1887 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1888 {
1889         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1890         struct ixgbe_tx_entry *txe = txq->sw_ring;
1891         uint16_t prev, i;
1892
1893         /* Zero out HW ring memory */
1894         for (i = 0; i < txq->nb_tx_desc; i++) {
1895                 txq->tx_ring[i] = zeroed_desc;
1896         }
1897
1898         /* Initialize SW ring entries */
1899         prev = (uint16_t) (txq->nb_tx_desc - 1);
1900         for (i = 0; i < txq->nb_tx_desc; i++) {
1901                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1902                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1903                 txe[i].mbuf = NULL;
1904                 txe[i].last_id = i;
1905                 txe[prev].next_id = i;
1906                 prev = i;
1907         }
1908
1909         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1910         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1911
1912         txq->tx_tail = 0;
1913         txq->nb_tx_used = 0;
1914         /*
1915          * Always allow 1 descriptor to be un-allocated to avoid
1916          * a H/W race condition
1917          */
1918         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1919         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1920         txq->ctx_curr = 0;
1921         memset((void*)&txq->ctx_cache, 0,
1922                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1923 }
1924
1925 static const struct ixgbe_txq_ops def_txq_ops = {
1926         .release_mbufs = ixgbe_tx_queue_release_mbufs,
1927         .free_swring = ixgbe_tx_free_swring,
1928         .reset = ixgbe_reset_tx_queue,
1929 };
1930
1931 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1932  * the queue parameters. Used in tx_queue_setup by primary process and then
1933  * in dev_init by secondary process when attaching to an existing ethdev.
1934  */
1935 void __attribute__((cold))
1936 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1937 {
1938         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1939         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1940                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1941                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1942 #ifdef RTE_IXGBE_INC_VECTOR
1943                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1944                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1945                                         ixgbe_txq_vec_setup(txq) == 0)) {
1946                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1947                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1948                 } else
1949 #endif
1950                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1951         } else {
1952                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1953                 PMD_INIT_LOG(DEBUG,
1954                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1955                                 (unsigned long)txq->txq_flags,
1956                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
1957                 PMD_INIT_LOG(DEBUG,
1958                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1959                                 (unsigned long)txq->tx_rs_thresh,
1960                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1961                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1962         }
1963 }
1964
1965 int __attribute__((cold))
1966 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1967                          uint16_t queue_idx,
1968                          uint16_t nb_desc,
1969                          unsigned int socket_id,
1970                          const struct rte_eth_txconf *tx_conf)
1971 {
1972         const struct rte_memzone *tz;
1973         struct ixgbe_tx_queue *txq;
1974         struct ixgbe_hw     *hw;
1975         uint16_t tx_rs_thresh, tx_free_thresh;
1976
1977         PMD_INIT_FUNC_TRACE();
1978         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1979
1980         /*
1981          * Validate number of transmit descriptors.
1982          * It must not exceed hardware maximum, and must be multiple
1983          * of IXGBE_ALIGN.
1984          */
1985         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
1986                         (nb_desc > IXGBE_MAX_RING_DESC) ||
1987                         (nb_desc < IXGBE_MIN_RING_DESC)) {
1988                 return -EINVAL;
1989         }
1990
1991         /*
1992          * The following two parameters control the setting of the RS bit on
1993          * transmit descriptors.
1994          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1995          * descriptors have been used.
1996          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1997          * descriptors are used or if the number of descriptors required
1998          * to transmit a packet is greater than the number of free TX
1999          * descriptors.
2000          * The following constraints must be satisfied:
2001          *  tx_rs_thresh must be greater than 0.
2002          *  tx_rs_thresh must be less than the size of the ring minus 2.
2003          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2004          *  tx_rs_thresh must be a divisor of the ring size.
2005          *  tx_free_thresh must be greater than 0.
2006          *  tx_free_thresh must be less than the size of the ring minus 3.
2007          * One descriptor in the TX ring is used as a sentinel to avoid a
2008          * H/W race condition, hence the maximum threshold constraints.
2009          * When set to zero use default values.
2010          */
2011         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2012                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2013         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2014                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2015         if (tx_rs_thresh >= (nb_desc - 2)) {
2016                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2017                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2018                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2019                         (int)dev->data->port_id, (int)queue_idx);
2020                 return -(EINVAL);
2021         }
2022         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2023                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2024                         "(tx_rs_thresh=%u port=%d queue=%d)",
2025                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2026                         (int)dev->data->port_id, (int)queue_idx);
2027                 return -(EINVAL);
2028         }
2029         if (tx_free_thresh >= (nb_desc - 3)) {
2030                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2031                              "tx_free_thresh must be less than the number of "
2032                              "TX descriptors minus 3. (tx_free_thresh=%u "
2033                              "port=%d queue=%d)",
2034                              (unsigned int)tx_free_thresh,
2035                              (int)dev->data->port_id, (int)queue_idx);
2036                 return -(EINVAL);
2037         }
2038         if (tx_rs_thresh > tx_free_thresh) {
2039                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2040                              "tx_free_thresh. (tx_free_thresh=%u "
2041                              "tx_rs_thresh=%u port=%d queue=%d)",
2042                              (unsigned int)tx_free_thresh,
2043                              (unsigned int)tx_rs_thresh,
2044                              (int)dev->data->port_id,
2045                              (int)queue_idx);
2046                 return -(EINVAL);
2047         }
2048         if ((nb_desc % tx_rs_thresh) != 0) {
2049                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2050                              "number of TX descriptors. (tx_rs_thresh=%u "
2051                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2052                              (int)dev->data->port_id, (int)queue_idx);
2053                 return -(EINVAL);
2054         }
2055
2056         /*
2057          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2058          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2059          * by the NIC and all descriptors are written back after the NIC
2060          * accumulates WTHRESH descriptors.
2061          */
2062         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2063                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2064                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2065                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2066                              (int)dev->data->port_id, (int)queue_idx);
2067                 return -(EINVAL);
2068         }
2069
2070         /* Free memory prior to re-allocation if needed... */
2071         if (dev->data->tx_queues[queue_idx] != NULL) {
2072                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2073                 dev->data->tx_queues[queue_idx] = NULL;
2074         }
2075
2076         /* First allocate the tx queue data structure */
2077         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2078                                  RTE_CACHE_LINE_SIZE, socket_id);
2079         if (txq == NULL)
2080                 return -ENOMEM;
2081
2082         /*
2083          * Allocate TX ring hardware descriptors. A memzone large enough to
2084          * handle the maximum ring size is allocated in order to allow for
2085          * resizing in later calls to the queue setup function.
2086          */
2087         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2088                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2089                         IXGBE_ALIGN, socket_id);
2090         if (tz == NULL) {
2091                 ixgbe_tx_queue_release(txq);
2092                 return -ENOMEM;
2093         }
2094
2095         txq->nb_tx_desc = nb_desc;
2096         txq->tx_rs_thresh = tx_rs_thresh;
2097         txq->tx_free_thresh = tx_free_thresh;
2098         txq->pthresh = tx_conf->tx_thresh.pthresh;
2099         txq->hthresh = tx_conf->tx_thresh.hthresh;
2100         txq->wthresh = tx_conf->tx_thresh.wthresh;
2101         txq->queue_id = queue_idx;
2102         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2103                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2104         txq->port_id = dev->data->port_id;
2105         txq->txq_flags = tx_conf->txq_flags;
2106         txq->ops = &def_txq_ops;
2107         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2108
2109         /*
2110          * Modification to set VFTDT for virtual function if vf is detected
2111          */
2112         if (hw->mac.type == ixgbe_mac_82599_vf ||
2113             hw->mac.type == ixgbe_mac_X540_vf ||
2114             hw->mac.type == ixgbe_mac_X550_vf ||
2115             hw->mac.type == ixgbe_mac_X550EM_x_vf)
2116                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2117         else
2118                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2119
2120         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2121         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2122
2123         /* Allocate software ring */
2124         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2125                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2126                                 RTE_CACHE_LINE_SIZE, socket_id);
2127         if (txq->sw_ring == NULL) {
2128                 ixgbe_tx_queue_release(txq);
2129                 return -ENOMEM;
2130         }
2131         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2132                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2133
2134         /* set up vector or scalar TX function as appropriate */
2135         ixgbe_set_tx_function(dev, txq);
2136
2137         txq->ops->reset(txq);
2138
2139         dev->data->tx_queues[queue_idx] = txq;
2140
2141
2142         return 0;
2143 }
2144
2145 /**
2146  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2147  *
2148  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2149  * in the sw_rsc_ring is not set to NULL but rather points to the next
2150  * mbuf of this RSC aggregation (that has not been completed yet and still
2151  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2152  * will just free first "nb_segs" segments of the cluster explicitly by calling
2153  * an rte_pktmbuf_free_seg().
2154  *
2155  * @m scattered cluster head
2156  */
2157 static void __attribute__((cold))
2158 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2159 {
2160         uint8_t i, nb_segs = m->nb_segs;
2161         struct rte_mbuf *next_seg;
2162
2163         for (i = 0; i < nb_segs; i++) {
2164                 next_seg = m->next;
2165                 rte_pktmbuf_free_seg(m);
2166                 m = next_seg;
2167         }
2168 }
2169
2170 static void __attribute__((cold))
2171 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2172 {
2173         unsigned i;
2174
2175 #ifdef RTE_IXGBE_INC_VECTOR
2176         /* SSE Vector driver has a different way of releasing mbufs. */
2177         if (rxq->rx_using_sse) {
2178                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2179                 return;
2180         }
2181 #endif
2182
2183         if (rxq->sw_ring != NULL) {
2184                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2185                         if (rxq->sw_ring[i].mbuf != NULL) {
2186                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2187                                 rxq->sw_ring[i].mbuf = NULL;
2188                         }
2189                 }
2190                 if (rxq->rx_nb_avail) {
2191                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2192                                 struct rte_mbuf *mb;
2193                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2194                                 rte_pktmbuf_free_seg(mb);
2195                         }
2196                         rxq->rx_nb_avail = 0;
2197                 }
2198         }
2199
2200         if (rxq->sw_sc_ring)
2201                 for (i = 0; i < rxq->nb_rx_desc; i++)
2202                         if (rxq->sw_sc_ring[i].fbuf) {
2203                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2204                                 rxq->sw_sc_ring[i].fbuf = NULL;
2205                         }
2206 }
2207
2208 static void __attribute__((cold))
2209 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2210 {
2211         if (rxq != NULL) {
2212                 ixgbe_rx_queue_release_mbufs(rxq);
2213                 rte_free(rxq->sw_ring);
2214                 rte_free(rxq->sw_sc_ring);
2215                 rte_free(rxq);
2216         }
2217 }
2218
2219 void __attribute__((cold))
2220 ixgbe_dev_rx_queue_release(void *rxq)
2221 {
2222         ixgbe_rx_queue_release(rxq);
2223 }
2224
2225 /*
2226  * Check if Rx Burst Bulk Alloc function can be used.
2227  * Return
2228  *        0: the preconditions are satisfied and the bulk allocation function
2229  *           can be used.
2230  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2231  *           function must be used.
2232  */
2233 static inline int __attribute__((cold))
2234 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2235 {
2236         int ret = 0;
2237
2238         /*
2239          * Make sure the following pre-conditions are satisfied:
2240          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2241          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2242          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2243          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2244          * Scattered packets are not supported.  This should be checked
2245          * outside of this function.
2246          */
2247         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2248                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2249                              "rxq->rx_free_thresh=%d, "
2250                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2251                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2252                 ret = -EINVAL;
2253         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2254                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2255                              "rxq->rx_free_thresh=%d, "
2256                              "rxq->nb_rx_desc=%d",
2257                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2258                 ret = -EINVAL;
2259         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2260                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2261                              "rxq->nb_rx_desc=%d, "
2262                              "rxq->rx_free_thresh=%d",
2263                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2264                 ret = -EINVAL;
2265         } else if (!(rxq->nb_rx_desc <
2266                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2267                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2268                              "rxq->nb_rx_desc=%d, "
2269                              "IXGBE_MAX_RING_DESC=%d, "
2270                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2271                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2272                              RTE_PMD_IXGBE_RX_MAX_BURST);
2273                 ret = -EINVAL;
2274         }
2275
2276         return ret;
2277 }
2278
2279 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2280 static void __attribute__((cold))
2281 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2282 {
2283         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2284         unsigned i;
2285         uint16_t len = rxq->nb_rx_desc;
2286
2287         /*
2288          * By default, the Rx queue setup function allocates enough memory for
2289          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2290          * extra memory at the end of the descriptor ring to be zero'd out. A
2291          * pre-condition for using the Rx burst bulk alloc function is that the
2292          * number of descriptors is less than or equal to
2293          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2294          * constraints here to see if we need to zero out memory after the end
2295          * of the H/W descriptor ring.
2296          */
2297         if (adapter->rx_bulk_alloc_allowed)
2298                 /* zero out extra memory */
2299                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2300
2301         /*
2302          * Zero out HW ring memory. Zero out extra memory at the end of
2303          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2304          * reads extra memory as zeros.
2305          */
2306         for (i = 0; i < len; i++) {
2307                 rxq->rx_ring[i] = zeroed_desc;
2308         }
2309
2310         /*
2311          * initialize extra software ring entries. Space for these extra
2312          * entries is always allocated
2313          */
2314         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2315         for (i = rxq->nb_rx_desc; i < len; ++i) {
2316                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2317         }
2318
2319         rxq->rx_nb_avail = 0;
2320         rxq->rx_next_avail = 0;
2321         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2322         rxq->rx_tail = 0;
2323         rxq->nb_rx_hold = 0;
2324         rxq->pkt_first_seg = NULL;
2325         rxq->pkt_last_seg = NULL;
2326
2327 #ifdef RTE_IXGBE_INC_VECTOR
2328         rxq->rxrearm_start = 0;
2329         rxq->rxrearm_nb = 0;
2330 #endif
2331 }
2332
2333 int __attribute__((cold))
2334 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2335                          uint16_t queue_idx,
2336                          uint16_t nb_desc,
2337                          unsigned int socket_id,
2338                          const struct rte_eth_rxconf *rx_conf,
2339                          struct rte_mempool *mp)
2340 {
2341         const struct rte_memzone *rz;
2342         struct ixgbe_rx_queue *rxq;
2343         struct ixgbe_hw     *hw;
2344         uint16_t len;
2345         struct ixgbe_adapter *adapter =
2346                 (struct ixgbe_adapter *)dev->data->dev_private;
2347
2348         PMD_INIT_FUNC_TRACE();
2349         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2350
2351         /*
2352          * Validate number of receive descriptors.
2353          * It must not exceed hardware maximum, and must be multiple
2354          * of IXGBE_ALIGN.
2355          */
2356         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2357                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2358                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2359                 return -EINVAL;
2360         }
2361
2362         /* Free memory prior to re-allocation if needed... */
2363         if (dev->data->rx_queues[queue_idx] != NULL) {
2364                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2365                 dev->data->rx_queues[queue_idx] = NULL;
2366         }
2367
2368         /* First allocate the rx queue data structure */
2369         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2370                                  RTE_CACHE_LINE_SIZE, socket_id);
2371         if (rxq == NULL)
2372                 return -ENOMEM;
2373         rxq->mb_pool = mp;
2374         rxq->nb_rx_desc = nb_desc;
2375         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2376         rxq->queue_id = queue_idx;
2377         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2378                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2379         rxq->port_id = dev->data->port_id;
2380         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2381                                                         0 : ETHER_CRC_LEN);
2382         rxq->drop_en = rx_conf->rx_drop_en;
2383         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2384
2385         /*
2386          * Allocate RX ring hardware descriptors. A memzone large enough to
2387          * handle the maximum ring size is allocated in order to allow for
2388          * resizing in later calls to the queue setup function.
2389          */
2390         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2391                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2392         if (rz == NULL) {
2393                 ixgbe_rx_queue_release(rxq);
2394                 return -ENOMEM;
2395         }
2396
2397         /*
2398          * Zero init all the descriptors in the ring.
2399          */
2400         memset (rz->addr, 0, RX_RING_SZ);
2401
2402         /*
2403          * Modified to setup VFRDT for Virtual Function
2404          */
2405         if (hw->mac.type == ixgbe_mac_82599_vf ||
2406             hw->mac.type == ixgbe_mac_X540_vf ||
2407             hw->mac.type == ixgbe_mac_X550_vf ||
2408             hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2409                 rxq->rdt_reg_addr =
2410                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2411                 rxq->rdh_reg_addr =
2412                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2413         }
2414         else {
2415                 rxq->rdt_reg_addr =
2416                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2417                 rxq->rdh_reg_addr =
2418                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2419         }
2420
2421         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2422         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2423
2424         /*
2425          * Certain constraints must be met in order to use the bulk buffer
2426          * allocation Rx burst function. If any of Rx queues doesn't meet them
2427          * the feature should be disabled for the whole port.
2428          */
2429         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2430                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2431                                     "preconditions - canceling the feature for "
2432                                     "the whole port[%d]",
2433                              rxq->queue_id, rxq->port_id);
2434                 adapter->rx_bulk_alloc_allowed = false;
2435         }
2436
2437         /*
2438          * Allocate software ring. Allow for space at the end of the
2439          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2440          * function does not access an invalid memory region.
2441          */
2442         len = nb_desc;
2443         if (adapter->rx_bulk_alloc_allowed)
2444                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2445
2446         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2447                                           sizeof(struct ixgbe_rx_entry) * len,
2448                                           RTE_CACHE_LINE_SIZE, socket_id);
2449         if (!rxq->sw_ring) {
2450                 ixgbe_rx_queue_release(rxq);
2451                 return -ENOMEM;
2452         }
2453
2454         /*
2455          * Always allocate even if it's not going to be needed in order to
2456          * simplify the code.
2457          *
2458          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2459          * be requested in ixgbe_dev_rx_init(), which is called later from
2460          * dev_start() flow.
2461          */
2462         rxq->sw_sc_ring =
2463                 rte_zmalloc_socket("rxq->sw_sc_ring",
2464                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2465                                    RTE_CACHE_LINE_SIZE, socket_id);
2466         if (!rxq->sw_sc_ring) {
2467                 ixgbe_rx_queue_release(rxq);
2468                 return -ENOMEM;
2469         }
2470
2471         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2472                             "dma_addr=0x%"PRIx64,
2473                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2474                      rxq->rx_ring_phys_addr);
2475
2476         if (!rte_is_power_of_2(nb_desc)) {
2477                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2478                                     "preconditions - canceling the feature for "
2479                                     "the whole port[%d]",
2480                              rxq->queue_id, rxq->port_id);
2481                 adapter->rx_vec_allowed = false;
2482         } else
2483                 ixgbe_rxq_vec_setup(rxq);
2484
2485         dev->data->rx_queues[queue_idx] = rxq;
2486
2487         ixgbe_reset_rx_queue(adapter, rxq);
2488
2489         return 0;
2490 }
2491
2492 uint32_t
2493 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2494 {
2495 #define IXGBE_RXQ_SCAN_INTERVAL 4
2496         volatile union ixgbe_adv_rx_desc *rxdp;
2497         struct ixgbe_rx_queue *rxq;
2498         uint32_t desc = 0;
2499
2500         if (rx_queue_id >= dev->data->nb_rx_queues) {
2501                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2502                 return 0;
2503         }
2504
2505         rxq = dev->data->rx_queues[rx_queue_id];
2506         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2507
2508         while ((desc < rxq->nb_rx_desc) &&
2509                 (rxdp->wb.upper.status_error &
2510                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2511                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2512                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2513                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2514                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2515                                 desc - rxq->nb_rx_desc]);
2516         }
2517
2518         return desc;
2519 }
2520
2521 int
2522 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2523 {
2524         volatile union ixgbe_adv_rx_desc *rxdp;
2525         struct ixgbe_rx_queue *rxq = rx_queue;
2526         uint32_t desc;
2527
2528         if (unlikely(offset >= rxq->nb_rx_desc))
2529                 return 0;
2530         desc = rxq->rx_tail + offset;
2531         if (desc >= rxq->nb_rx_desc)
2532                 desc -= rxq->nb_rx_desc;
2533
2534         rxdp = &rxq->rx_ring[desc];
2535         return !!(rxdp->wb.upper.status_error &
2536                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2537 }
2538
2539 void __attribute__((cold))
2540 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2541 {
2542         unsigned i;
2543         struct ixgbe_adapter *adapter =
2544                 (struct ixgbe_adapter *)dev->data->dev_private;
2545
2546         PMD_INIT_FUNC_TRACE();
2547
2548         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2549                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2550                 if (txq != NULL) {
2551                         txq->ops->release_mbufs(txq);
2552                         txq->ops->reset(txq);
2553                 }
2554         }
2555
2556         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2557                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2558                 if (rxq != NULL) {
2559                         ixgbe_rx_queue_release_mbufs(rxq);
2560                         ixgbe_reset_rx_queue(adapter, rxq);
2561                 }
2562         }
2563 }
2564
2565 void
2566 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2567 {
2568         unsigned i;
2569
2570         PMD_INIT_FUNC_TRACE();
2571
2572         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2573                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2574                 dev->data->rx_queues[i] = NULL;
2575         }
2576         dev->data->nb_rx_queues = 0;
2577
2578         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2579                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2580                 dev->data->tx_queues[i] = NULL;
2581         }
2582         dev->data->nb_tx_queues = 0;
2583 }
2584
2585 /*********************************************************************
2586  *
2587  *  Device RX/TX init functions
2588  *
2589  **********************************************************************/
2590
2591 /**
2592  * Receive Side Scaling (RSS)
2593  * See section 7.1.2.8 in the following document:
2594  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2595  *
2596  * Principles:
2597  * The source and destination IP addresses of the IP header and the source
2598  * and destination ports of TCP/UDP headers, if any, of received packets are
2599  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2600  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2601  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2602  * RSS output index which is used as the RX queue index where to store the
2603  * received packets.
2604  * The following output is supplied in the RX write-back descriptor:
2605  *     - 32-bit result of the Microsoft RSS hash function,
2606  *     - 4-bit RSS type field.
2607  */
2608
2609 /*
2610  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2611  * Used as the default key.
2612  */
2613 static uint8_t rss_intel_key[40] = {
2614         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2615         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2616         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2617         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2618         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2619 };
2620
2621 static void
2622 ixgbe_rss_disable(struct rte_eth_dev *dev)
2623 {
2624         struct ixgbe_hw *hw;
2625         uint32_t mrqc;
2626         uint32_t mrqc_reg;
2627
2628         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2629         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2630         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2631         mrqc &= ~IXGBE_MRQC_RSSEN;
2632         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2633 }
2634
2635 static void
2636 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2637 {
2638         uint8_t  *hash_key;
2639         uint32_t mrqc;
2640         uint32_t rss_key;
2641         uint64_t rss_hf;
2642         uint16_t i;
2643         uint32_t mrqc_reg;
2644         uint32_t rssrk_reg;
2645
2646         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2647         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2648
2649         hash_key = rss_conf->rss_key;
2650         if (hash_key != NULL) {
2651                 /* Fill in RSS hash key */
2652                 for (i = 0; i < 10; i++) {
2653                         rss_key  = hash_key[(i * 4)];
2654                         rss_key |= hash_key[(i * 4) + 1] << 8;
2655                         rss_key |= hash_key[(i * 4) + 2] << 16;
2656                         rss_key |= hash_key[(i * 4) + 3] << 24;
2657                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2658                 }
2659         }
2660
2661         /* Set configured hashing protocols in MRQC register */
2662         rss_hf = rss_conf->rss_hf;
2663         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2664         if (rss_hf & ETH_RSS_IPV4)
2665                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2666         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2667                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2668         if (rss_hf & ETH_RSS_IPV6)
2669                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2670         if (rss_hf & ETH_RSS_IPV6_EX)
2671                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2672         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2673                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2674         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2675                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2676         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2677                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2678         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2679                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2680         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2681                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2682         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2683 }
2684
2685 int
2686 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2687                           struct rte_eth_rss_conf *rss_conf)
2688 {
2689         struct ixgbe_hw *hw;
2690         uint32_t mrqc;
2691         uint64_t rss_hf;
2692         uint32_t mrqc_reg;
2693
2694         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2695
2696         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2697                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2698                         "NIC.");
2699                 return -ENOTSUP;
2700         }
2701         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2702
2703         /*
2704          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2705          *     "RSS enabling cannot be done dynamically while it must be
2706          *      preceded by a software reset"
2707          * Before changing anything, first check that the update RSS operation
2708          * does not attempt to disable RSS, if RSS was enabled at
2709          * initialization time, or does not attempt to enable RSS, if RSS was
2710          * disabled at initialization time.
2711          */
2712         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2713         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2714         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2715                 if (rss_hf != 0) /* Enable RSS */
2716                         return -(EINVAL);
2717                 return 0; /* Nothing to do */
2718         }
2719         /* RSS enabled */
2720         if (rss_hf == 0) /* Disable RSS */
2721                 return -(EINVAL);
2722         ixgbe_hw_rss_hash_set(hw, rss_conf);
2723         return 0;
2724 }
2725
2726 int
2727 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2728                             struct rte_eth_rss_conf *rss_conf)
2729 {
2730         struct ixgbe_hw *hw;
2731         uint8_t *hash_key;
2732         uint32_t mrqc;
2733         uint32_t rss_key;
2734         uint64_t rss_hf;
2735         uint16_t i;
2736         uint32_t mrqc_reg;
2737         uint32_t rssrk_reg;
2738
2739         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2740         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2741         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2742         hash_key = rss_conf->rss_key;
2743         if (hash_key != NULL) {
2744                 /* Return RSS hash key */
2745                 for (i = 0; i < 10; i++) {
2746                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2747                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2748                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2749                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2750                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2751                 }
2752         }
2753
2754         /* Get RSS functions configured in MRQC register */
2755         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2756         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2757                 rss_conf->rss_hf = 0;
2758                 return 0;
2759         }
2760         rss_hf = 0;
2761         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2762                 rss_hf |= ETH_RSS_IPV4;
2763         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2764                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2765         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2766                 rss_hf |= ETH_RSS_IPV6;
2767         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2768                 rss_hf |= ETH_RSS_IPV6_EX;
2769         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2770                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2771         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2772                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2773         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2774                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2775         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2776                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2777         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2778                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2779         rss_conf->rss_hf = rss_hf;
2780         return 0;
2781 }
2782
2783 static void
2784 ixgbe_rss_configure(struct rte_eth_dev *dev)
2785 {
2786         struct rte_eth_rss_conf rss_conf;
2787         struct ixgbe_hw *hw;
2788         uint32_t reta;
2789         uint16_t i;
2790         uint16_t j;
2791         uint16_t sp_reta_size;
2792         uint32_t reta_reg;
2793
2794         PMD_INIT_FUNC_TRACE();
2795         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2796
2797         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2798
2799         /*
2800          * Fill in redirection table
2801          * The byte-swap is needed because NIC registers are in
2802          * little-endian order.
2803          */
2804         reta = 0;
2805         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2806                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2807
2808                 if (j == dev->data->nb_rx_queues)
2809                         j = 0;
2810                 reta = (reta << 8) | j;
2811                 if ((i & 3) == 3)
2812                         IXGBE_WRITE_REG(hw, reta_reg,
2813                                         rte_bswap32(reta));
2814         }
2815
2816         /*
2817          * Configure the RSS key and the RSS protocols used to compute
2818          * the RSS hash of input packets.
2819          */
2820         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2821         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2822                 ixgbe_rss_disable(dev);
2823                 return;
2824         }
2825         if (rss_conf.rss_key == NULL)
2826                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2827         ixgbe_hw_rss_hash_set(hw, &rss_conf);
2828 }
2829
2830 #define NUM_VFTA_REGISTERS 128
2831 #define NIC_RX_BUFFER_SIZE 0x200
2832 #define X550_RX_BUFFER_SIZE 0x180
2833
2834 static void
2835 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2836 {
2837         struct rte_eth_vmdq_dcb_conf *cfg;
2838         struct ixgbe_hw *hw;
2839         enum rte_eth_nb_pools num_pools;
2840         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2841         uint16_t pbsize;
2842         uint8_t nb_tcs; /* number of traffic classes */
2843         int i;
2844
2845         PMD_INIT_FUNC_TRACE();
2846         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2847         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2848         num_pools = cfg->nb_queue_pools;
2849         /* Check we have a valid number of pools */
2850         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2851                 ixgbe_rss_disable(dev);
2852                 return;
2853         }
2854         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2855         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2856
2857         /*
2858          * RXPBSIZE
2859          * split rx buffer up into sections, each for 1 traffic class
2860          */
2861         switch (hw->mac.type) {
2862         case ixgbe_mac_X550:
2863         case ixgbe_mac_X550EM_x:
2864                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2865                 break;
2866         default:
2867                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2868                 break;
2869         }
2870         for (i = 0 ; i < nb_tcs; i++) {
2871                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2872                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2873                 /* clear 10 bits. */
2874                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2875                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2876         }
2877         /* zero alloc all unused TCs */
2878         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2879                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2880                 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2881                 /* clear 10 bits. */
2882                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2883         }
2884
2885         /* MRQC: enable vmdq and dcb */
2886         mrqc = ((num_pools == ETH_16_POOLS) ? \
2887                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2888         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2889
2890         /* PFVTCTL: turn on virtualisation and set the default pool */
2891         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2892         if (cfg->enable_default_pool) {
2893                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2894         } else {
2895                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2896         }
2897
2898         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2899
2900         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2901         queue_mapping = 0;
2902         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2903                 /*
2904                  * mapping is done with 3 bits per priority,
2905                  * so shift by i*3 each time
2906                  */
2907                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2908
2909         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2910
2911         /* RTRPCS: DCB related */
2912         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2913
2914         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2915         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2916         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2917         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2918
2919         /* VFTA - enable all vlan filters */
2920         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2921                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2922         }
2923
2924         /* VFRE: pool enabling for receive - 16 or 32 */
2925         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2926                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2927
2928         /*
2929          * MPSAR - allow pools to read specific mac addresses
2930          * In this case, all pools should be able to read from mac addr 0
2931          */
2932         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2933         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2934
2935         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2936         for (i = 0; i < cfg->nb_pool_maps; i++) {
2937                 /* set vlan id in VF register and set the valid bit */
2938                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2939                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
2940                 /*
2941                  * Put the allowed pools in VFB reg. As we only have 16 or 32
2942                  * pools, we only need to use the first half of the register
2943                  * i.e. bits 0-31
2944                  */
2945                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2946         }
2947 }
2948
2949 /**
2950  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2951  * @hw: pointer to hardware structure
2952  * @dcb_config: pointer to ixgbe_dcb_config structure
2953  */
2954 static void
2955 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2956                struct ixgbe_dcb_config *dcb_config)
2957 {
2958         uint32_t reg;
2959         uint32_t q;
2960
2961         PMD_INIT_FUNC_TRACE();
2962         if (hw->mac.type != ixgbe_mac_82598EB) {
2963                 /* Disable the Tx desc arbiter so that MTQC can be changed */
2964                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2965                 reg |= IXGBE_RTTDCS_ARBDIS;
2966                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2967
2968                 /* Enable DCB for Tx with 8 TCs */
2969                 if (dcb_config->num_tcs.pg_tcs == 8) {
2970                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2971                 }
2972                 else {
2973                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2974                 }
2975                 if (dcb_config->vt_mode)
2976                     reg |= IXGBE_MTQC_VT_ENA;
2977                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2978
2979                 /* Disable drop for all queues */
2980                 for (q = 0; q < 128; q++)
2981                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
2982                      (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2983
2984                 /* Enable the Tx desc arbiter */
2985                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2986                 reg &= ~IXGBE_RTTDCS_ARBDIS;
2987                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2988
2989                 /* Enable Security TX Buffer IFG for DCB */
2990                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2991                 reg |= IXGBE_SECTX_DCB;
2992                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2993         }
2994         return;
2995 }
2996
2997 /**
2998  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2999  * @dev: pointer to rte_eth_dev structure
3000  * @dcb_config: pointer to ixgbe_dcb_config structure
3001  */
3002 static void
3003 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3004                         struct ixgbe_dcb_config *dcb_config)
3005 {
3006         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3007                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3008         struct ixgbe_hw *hw =
3009                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3010
3011         PMD_INIT_FUNC_TRACE();
3012         if (hw->mac.type != ixgbe_mac_82598EB)
3013                 /*PF VF Transmit Enable*/
3014                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3015                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3016
3017         /*Configure general DCB TX parameters*/
3018         ixgbe_dcb_tx_hw_config(hw,dcb_config);
3019         return;
3020 }
3021
3022 static void
3023 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3024                         struct ixgbe_dcb_config *dcb_config)
3025 {
3026         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3027                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3028         struct ixgbe_dcb_tc_config *tc;
3029         uint8_t i,j;
3030
3031         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3032         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3033                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3034                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3035         }
3036         else {
3037                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3038                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3039         }
3040         /* User Priority to Traffic Class mapping */
3041         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3042                 j = vmdq_rx_conf->dcb_tc[i];
3043                 tc = &dcb_config->tc_config[j];
3044                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3045                                                 (uint8_t)(1 << j);
3046         }
3047 }
3048
3049 static void
3050 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3051                         struct ixgbe_dcb_config *dcb_config)
3052 {
3053         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3054                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3055         struct ixgbe_dcb_tc_config *tc;
3056         uint8_t i,j;
3057
3058         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3059         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3060                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3061                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3062         }
3063         else {
3064                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3065                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3066         }
3067
3068         /* User Priority to Traffic Class mapping */
3069         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3070                 j = vmdq_tx_conf->dcb_tc[i];
3071                 tc = &dcb_config->tc_config[j];
3072                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3073                                                 (uint8_t)(1 << j);
3074         }
3075         return;
3076 }
3077
3078 static void
3079 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3080                 struct ixgbe_dcb_config *dcb_config)
3081 {
3082         struct rte_eth_dcb_rx_conf *rx_conf =
3083                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3084         struct ixgbe_dcb_tc_config *tc;
3085         uint8_t i,j;
3086
3087         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3088         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3089
3090         /* User Priority to Traffic Class mapping */
3091         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3092                 j = rx_conf->dcb_tc[i];
3093                 tc = &dcb_config->tc_config[j];
3094                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3095                                                 (uint8_t)(1 << j);
3096         }
3097 }
3098
3099 static void
3100 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3101                 struct ixgbe_dcb_config *dcb_config)
3102 {
3103         struct rte_eth_dcb_tx_conf *tx_conf =
3104                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3105         struct ixgbe_dcb_tc_config *tc;
3106         uint8_t i,j;
3107
3108         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3109         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3110
3111         /* User Priority to Traffic Class mapping */
3112         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3113                 j = tx_conf->dcb_tc[i];
3114                 tc = &dcb_config->tc_config[j];
3115                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3116                                                 (uint8_t)(1 << j);
3117         }
3118 }
3119
3120 /**
3121  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3122  * @hw: pointer to hardware structure
3123  * @dcb_config: pointer to ixgbe_dcb_config structure
3124  */
3125 static void
3126 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3127                struct ixgbe_dcb_config *dcb_config)
3128 {
3129         uint32_t reg;
3130         uint32_t vlanctrl;
3131         uint8_t i;
3132
3133         PMD_INIT_FUNC_TRACE();
3134         /*
3135          * Disable the arbiter before changing parameters
3136          * (always enable recycle mode; WSP)
3137          */
3138         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3139         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3140
3141         if (hw->mac.type != ixgbe_mac_82598EB) {
3142                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3143                 if (dcb_config->num_tcs.pg_tcs == 4) {
3144                         if (dcb_config->vt_mode)
3145                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3146                                         IXGBE_MRQC_VMDQRT4TCEN;
3147                         else {
3148                                 /* no matter the mode is DCB or DCB_RSS, just
3149                                  * set the MRQE to RSSXTCEN. RSS is controlled
3150                                  * by RSS_FIELD
3151                                  */
3152                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3153                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3154                                         IXGBE_MRQC_RTRSS4TCEN;
3155                         }
3156                 }
3157                 if (dcb_config->num_tcs.pg_tcs == 8) {
3158                         if (dcb_config->vt_mode)
3159                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3160                                         IXGBE_MRQC_VMDQRT8TCEN;
3161                         else {
3162                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3163                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3164                                         IXGBE_MRQC_RTRSS8TCEN;
3165                         }
3166                 }
3167
3168                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3169         }
3170
3171         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3172         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3173         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3174         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3175
3176         /* VFTA - enable all vlan filters */
3177         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3178                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3179         }
3180
3181         /*
3182          * Configure Rx packet plane (recycle mode; WSP) and
3183          * enable arbiter
3184          */
3185         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3186         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3187
3188         return;
3189 }
3190
3191 static void
3192 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3193                         uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3194 {
3195         switch (hw->mac.type) {
3196         case ixgbe_mac_82598EB:
3197                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3198                 break;
3199         case ixgbe_mac_82599EB:
3200         case ixgbe_mac_X540:
3201         case ixgbe_mac_X550:
3202         case ixgbe_mac_X550EM_x:
3203                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3204                                                   tsa, map);
3205                 break;
3206         default:
3207                 break;
3208         }
3209 }
3210
3211 static void
3212 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3213                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3214 {
3215         switch (hw->mac.type) {
3216         case ixgbe_mac_82598EB:
3217                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3218                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3219                 break;
3220         case ixgbe_mac_82599EB:
3221         case ixgbe_mac_X540:
3222         case ixgbe_mac_X550:
3223         case ixgbe_mac_X550EM_x:
3224                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3225                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3226                 break;
3227         default:
3228                 break;
3229         }
3230 }
3231
3232 #define DCB_RX_CONFIG  1
3233 #define DCB_TX_CONFIG  1
3234 #define DCB_TX_PB      1024
3235 /**
3236  * ixgbe_dcb_hw_configure - Enable DCB and configure
3237  * general DCB in VT mode and non-VT mode parameters
3238  * @dev: pointer to rte_eth_dev structure
3239  * @dcb_config: pointer to ixgbe_dcb_config structure
3240  */
3241 static int
3242 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3243                         struct ixgbe_dcb_config *dcb_config)
3244 {
3245         int     ret = 0;
3246         uint8_t i,pfc_en,nb_tcs;
3247         uint16_t pbsize, rx_buffer_size;
3248         uint8_t config_dcb_rx = 0;
3249         uint8_t config_dcb_tx = 0;
3250         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3251         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3252         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3253         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3254         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3255         struct ixgbe_dcb_tc_config *tc;
3256         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3257         struct ixgbe_hw *hw =
3258                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3259
3260         switch(dev->data->dev_conf.rxmode.mq_mode){
3261         case ETH_MQ_RX_VMDQ_DCB:
3262                 dcb_config->vt_mode = true;
3263                 if (hw->mac.type != ixgbe_mac_82598EB) {
3264                         config_dcb_rx = DCB_RX_CONFIG;
3265                         /*
3266                          *get dcb and VT rx configuration parameters
3267                          *from rte_eth_conf
3268                          */
3269                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3270                         /*Configure general VMDQ and DCB RX parameters*/
3271                         ixgbe_vmdq_dcb_configure(dev);
3272                 }
3273                 break;
3274         case ETH_MQ_RX_DCB:
3275         case ETH_MQ_RX_DCB_RSS:
3276                 dcb_config->vt_mode = false;
3277                 config_dcb_rx = DCB_RX_CONFIG;
3278                 /* Get dcb TX configuration parameters from rte_eth_conf */
3279                 ixgbe_dcb_rx_config(dev, dcb_config);
3280                 /*Configure general DCB RX parameters*/
3281                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3282                 break;
3283         default:
3284                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3285                 break;
3286         }
3287         switch (dev->data->dev_conf.txmode.mq_mode) {
3288         case ETH_MQ_TX_VMDQ_DCB:
3289                 dcb_config->vt_mode = true;
3290                 config_dcb_tx = DCB_TX_CONFIG;
3291                 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3292                 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3293                 /*Configure general VMDQ and DCB TX parameters*/
3294                 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3295                 break;
3296
3297         case ETH_MQ_TX_DCB:
3298                 dcb_config->vt_mode = false;
3299                 config_dcb_tx = DCB_TX_CONFIG;
3300                 /*get DCB TX configuration parameters from rte_eth_conf*/
3301                 ixgbe_dcb_tx_config(dev, dcb_config);
3302                 /*Configure general DCB TX parameters*/
3303                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3304                 break;
3305         default:
3306                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3307                 break;
3308         }
3309
3310         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3311         /* Unpack map */
3312         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3313         if(nb_tcs == ETH_4_TCS) {
3314                 /* Avoid un-configured priority mapping to TC0 */
3315                 uint8_t j = 4;
3316                 uint8_t mask = 0xFF;
3317                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3318                         mask = (uint8_t)(mask & (~ (1 << map[i])));
3319                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3320                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3321                                 map[j++] = i;
3322                         mask >>= 1;
3323                 }
3324                 /* Re-configure 4 TCs BW */
3325                 for (i = 0; i < nb_tcs; i++) {
3326                         tc = &dcb_config->tc_config[i];
3327                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3328                                                 (uint8_t)(100 / nb_tcs);
3329                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3330                                                 (uint8_t)(100 / nb_tcs);
3331                 }
3332                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3333                         tc = &dcb_config->tc_config[i];
3334                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3335                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3336                 }
3337         }
3338
3339         switch (hw->mac.type) {
3340         case ixgbe_mac_X550:
3341         case ixgbe_mac_X550EM_x:
3342                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3343                 break;
3344         default:
3345                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3346                 break;
3347         }
3348
3349         if(config_dcb_rx) {
3350                 /* Set RX buffer size */
3351                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3352                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3353                 for (i = 0 ; i < nb_tcs; i++) {
3354                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3355                 }
3356                 /* zero alloc all unused TCs */
3357                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3358                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3359                 }
3360         }
3361         if(config_dcb_tx) {
3362                 /* Only support an equally distributed Tx packet buffer strategy. */
3363                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3364                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3365                 for (i = 0; i < nb_tcs; i++) {
3366                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3367                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3368                 }
3369                 /* Clear unused TCs, if any, to zero buffer size*/
3370                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3371                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3372                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3373                 }
3374         }
3375
3376         /*Calculates traffic class credits*/
3377         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3378                                 IXGBE_DCB_TX_CONFIG);
3379         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3380                                 IXGBE_DCB_RX_CONFIG);
3381
3382         if(config_dcb_rx) {
3383                 /* Unpack CEE standard containers */
3384                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3385                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3386                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3387                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3388                 /* Configure PG(ETS) RX */
3389                 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3390         }
3391
3392         if(config_dcb_tx) {
3393                 /* Unpack CEE standard containers */
3394                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3395                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3396                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3397                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3398                 /* Configure PG(ETS) TX */
3399                 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3400         }
3401
3402         /*Configure queue statistics registers*/
3403         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3404
3405         /* Check if the PFC is supported */
3406         if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3407                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3408                 for (i = 0; i < nb_tcs; i++) {
3409                         /*
3410                         * If the TC count is 8,and the default high_water is 48,
3411                         * the low_water is 16 as default.
3412                         */
3413                         hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3414                         hw->fc.low_water[i] = pbsize / 4;
3415                         /* Enable pfc for this TC */
3416                         tc = &dcb_config->tc_config[i];
3417                         tc->pfc = ixgbe_dcb_pfc_enabled;
3418                 }
3419                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3420                 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3421                         pfc_en &= 0x0F;
3422                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3423         }
3424
3425         return ret;
3426 }
3427
3428 /**
3429  * ixgbe_configure_dcb - Configure DCB  Hardware
3430  * @dev: pointer to rte_eth_dev
3431  */
3432 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3433 {
3434         struct ixgbe_dcb_config *dcb_cfg =
3435                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3436         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3437
3438         PMD_INIT_FUNC_TRACE();
3439
3440         /* check support mq_mode for DCB */
3441         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3442             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3443             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3444                 return;
3445
3446         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3447                 return;
3448
3449         /** Configure DCB hardware **/
3450         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3451
3452         return;
3453 }
3454
3455 /*
3456  * VMDq only support for 10 GbE NIC.
3457  */
3458 static void
3459 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3460 {
3461         struct rte_eth_vmdq_rx_conf *cfg;
3462         struct ixgbe_hw *hw;
3463         enum rte_eth_nb_pools num_pools;
3464         uint32_t mrqc, vt_ctl, vlanctrl;
3465         uint32_t vmolr = 0;
3466         int i;
3467
3468         PMD_INIT_FUNC_TRACE();
3469         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3470         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3471         num_pools = cfg->nb_queue_pools;
3472
3473         ixgbe_rss_disable(dev);
3474
3475         /* MRQC: enable vmdq */
3476         mrqc = IXGBE_MRQC_VMDQEN;
3477         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3478
3479         /* PFVTCTL: turn on virtualisation and set the default pool */
3480         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3481         if (cfg->enable_default_pool)
3482                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3483         else
3484                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3485
3486         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3487
3488         for (i = 0; i < (int)num_pools; i++) {
3489                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3490                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3491         }
3492
3493         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3494         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3495         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3496         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3497
3498         /* VFTA - enable all vlan filters */
3499         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3500                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3501
3502         /* VFRE: pool enabling for receive - 64 */
3503         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3504         if (num_pools == ETH_64_POOLS)
3505                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3506
3507         /*
3508          * MPSAR - allow pools to read specific mac addresses
3509          * In this case, all pools should be able to read from mac addr 0
3510          */
3511         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3512         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3513
3514         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3515         for (i = 0; i < cfg->nb_pool_maps; i++) {
3516                 /* set vlan id in VF register and set the valid bit */
3517                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3518                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3519                 /*
3520                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3521                  * pools, we only need to use the first half of the register
3522                  * i.e. bits 0-31
3523                  */
3524                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3525                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3526                                         (cfg->pool_map[i].pools & UINT32_MAX));
3527                 else
3528                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3529                                         ((cfg->pool_map[i].pools >> 32) \
3530                                         & UINT32_MAX));
3531
3532         }
3533
3534         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3535         if (cfg->enable_loop_back) {
3536                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3537                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3538                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3539         }
3540
3541         IXGBE_WRITE_FLUSH(hw);
3542 }
3543
3544 /*
3545  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3546  * @hw: pointer to hardware structure
3547  */
3548 static void
3549 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3550 {
3551         uint32_t reg;
3552         uint32_t q;
3553
3554         PMD_INIT_FUNC_TRACE();
3555         /*PF VF Transmit Enable*/
3556         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3557         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3558
3559         /* Disable the Tx desc arbiter so that MTQC can be changed */
3560         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3561         reg |= IXGBE_RTTDCS_ARBDIS;
3562         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3563
3564         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3565         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3566
3567         /* Disable drop for all queues */
3568         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3569                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3570                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3571
3572         /* Enable the Tx desc arbiter */
3573         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3574         reg &= ~IXGBE_RTTDCS_ARBDIS;
3575         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3576
3577         IXGBE_WRITE_FLUSH(hw);
3578
3579         return;
3580 }
3581
3582 static int __attribute__((cold))
3583 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3584 {
3585         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3586         uint64_t dma_addr;
3587         unsigned i;
3588
3589         /* Initialize software ring entries */
3590         for (i = 0; i < rxq->nb_rx_desc; i++) {
3591                 volatile union ixgbe_adv_rx_desc *rxd;
3592                 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3593                 if (mbuf == NULL) {
3594                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3595                                      (unsigned) rxq->queue_id);
3596                         return -ENOMEM;
3597                 }
3598
3599                 rte_mbuf_refcnt_set(mbuf, 1);
3600                 mbuf->next = NULL;
3601                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3602                 mbuf->nb_segs = 1;
3603                 mbuf->port = rxq->port_id;
3604
3605                 dma_addr =
3606                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3607                 rxd = &rxq->rx_ring[i];
3608                 rxd->read.hdr_addr = 0;
3609                 rxd->read.pkt_addr = dma_addr;
3610                 rxe[i].mbuf = mbuf;
3611         }
3612
3613         return 0;
3614 }
3615
3616 static int
3617 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3618 {
3619         struct ixgbe_hw *hw;
3620         uint32_t mrqc;
3621
3622         ixgbe_rss_configure(dev);
3623
3624         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3625
3626         /* MRQC: enable VF RSS */
3627         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3628         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3629         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3630         case ETH_64_POOLS:
3631                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3632                 break;
3633
3634         case ETH_32_POOLS:
3635                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3636                 break;
3637
3638         default:
3639                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3640                 return -EINVAL;
3641         }
3642
3643         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3644
3645         return 0;
3646 }
3647
3648 static int
3649 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3650 {
3651         struct ixgbe_hw *hw =
3652                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3653
3654         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3655         case ETH_64_POOLS:
3656                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3657                         IXGBE_MRQC_VMDQEN);
3658                 break;
3659
3660         case ETH_32_POOLS:
3661                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3662                         IXGBE_MRQC_VMDQRT4TCEN);
3663                 break;
3664
3665         case ETH_16_POOLS:
3666                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3667                         IXGBE_MRQC_VMDQRT8TCEN);
3668                 break;
3669         default:
3670                 PMD_INIT_LOG(ERR,
3671                         "invalid pool number in IOV mode");
3672                 break;
3673         }
3674         return 0;
3675 }
3676
3677 static int
3678 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3679 {
3680         struct ixgbe_hw *hw =
3681                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3682
3683         if (hw->mac.type == ixgbe_mac_82598EB)
3684                 return 0;
3685
3686         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3687                 /*
3688                  * SRIOV inactive scheme
3689                  * any DCB/RSS w/o VMDq multi-queue setting
3690                  */
3691                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3692                 case ETH_MQ_RX_RSS:
3693                 case ETH_MQ_RX_DCB_RSS:
3694                 case ETH_MQ_RX_VMDQ_RSS:
3695                         ixgbe_rss_configure(dev);
3696                         break;
3697
3698                 case ETH_MQ_RX_VMDQ_DCB:
3699                         ixgbe_vmdq_dcb_configure(dev);
3700                         break;
3701
3702                 case ETH_MQ_RX_VMDQ_ONLY:
3703                         ixgbe_vmdq_rx_hw_configure(dev);
3704                         break;
3705
3706                 case ETH_MQ_RX_NONE:
3707                 default:
3708                         /* if mq_mode is none, disable rss mode.*/
3709                         ixgbe_rss_disable(dev);
3710                         break;
3711                 }
3712         } else {
3713                 /*
3714                  * SRIOV active scheme
3715                  * Support RSS together with VMDq & SRIOV
3716                  */
3717                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3718                 case ETH_MQ_RX_RSS:
3719                 case ETH_MQ_RX_VMDQ_RSS:
3720                         ixgbe_config_vf_rss(dev);
3721                         break;
3722
3723                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3724                 case ETH_MQ_RX_VMDQ_DCB:
3725                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3726                         PMD_INIT_LOG(ERR,
3727                                 "Could not support DCB with VMDq & SRIOV");
3728                         return -1;
3729                 default:
3730                         ixgbe_config_vf_default(dev);
3731                         break;
3732                 }
3733         }
3734
3735         return 0;
3736 }
3737
3738 static int
3739 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3740 {
3741         struct ixgbe_hw *hw =
3742                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3743         uint32_t mtqc;
3744         uint32_t rttdcs;
3745
3746         if (hw->mac.type == ixgbe_mac_82598EB)
3747                 return 0;
3748
3749         /* disable arbiter before setting MTQC */
3750         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3751         rttdcs |= IXGBE_RTTDCS_ARBDIS;
3752         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3753
3754         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3755                 /*
3756                  * SRIOV inactive scheme
3757                  * any DCB w/o VMDq multi-queue setting
3758                  */
3759                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3760                         ixgbe_vmdq_tx_hw_configure(hw);
3761                 else {
3762                         mtqc = IXGBE_MTQC_64Q_1PB;
3763                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3764                 }
3765         } else {
3766                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3767
3768                 /*
3769                  * SRIOV active scheme
3770                  * FIXME if support DCB together with VMDq & SRIOV
3771                  */
3772                 case ETH_64_POOLS:
3773                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3774                         break;
3775                 case ETH_32_POOLS:
3776                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3777                         break;
3778                 case ETH_16_POOLS:
3779                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3780                                 IXGBE_MTQC_8TC_8TQ;
3781                         break;
3782                 default:
3783                         mtqc = IXGBE_MTQC_64Q_1PB;
3784                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3785                 }
3786                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3787         }
3788
3789         /* re-enable arbiter */
3790         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3791         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3792
3793         return 0;
3794 }
3795
3796 /**
3797  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3798  *
3799  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3800  * spec rev. 3.0 chapter 8.2.3.8.13.
3801  *
3802  * @pool Memory pool of the Rx queue
3803  */
3804 static inline uint32_t
3805 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3806 {
3807         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3808
3809         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3810         uint16_t maxdesc =
3811                 IPV4_MAX_PKT_LEN /
3812                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3813
3814         if (maxdesc >= 16)
3815                 return IXGBE_RSCCTL_MAXDESC_16;
3816         else if (maxdesc >= 8)
3817                 return IXGBE_RSCCTL_MAXDESC_8;
3818         else if (maxdesc >= 4)
3819                 return IXGBE_RSCCTL_MAXDESC_4;
3820         else
3821                 return IXGBE_RSCCTL_MAXDESC_1;
3822 }
3823
3824 /**
3825  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3826  * interrupt
3827  *
3828  * (Taken from FreeBSD tree)
3829  * (yes this is all very magic and confusing :)
3830  *
3831  * @dev port handle
3832  * @entry the register array entry
3833  * @vector the MSIX vector for this queue
3834  * @type RX/TX/MISC
3835  */
3836 static void
3837 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3838 {
3839         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3840         u32 ivar, index;
3841
3842         vector |= IXGBE_IVAR_ALLOC_VAL;
3843
3844         switch (hw->mac.type) {
3845
3846         case ixgbe_mac_82598EB:
3847                 if (type == -1)
3848                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3849                 else
3850                         entry += (type * 64);
3851                 index = (entry >> 2) & 0x1F;
3852                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3853                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3854                 ivar |= (vector << (8 * (entry & 0x3)));
3855                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3856                 break;
3857
3858         case ixgbe_mac_82599EB:
3859         case ixgbe_mac_X540:
3860                 if (type == -1) { /* MISC IVAR */
3861                         index = (entry & 1) * 8;
3862                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3863                         ivar &= ~(0xFF << index);
3864                         ivar |= (vector << index);
3865                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3866                 } else {        /* RX/TX IVARS */
3867                         index = (16 * (entry & 1)) + (8 * type);
3868                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3869                         ivar &= ~(0xFF << index);
3870                         ivar |= (vector << index);
3871                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3872                 }
3873
3874                 break;
3875
3876         default:
3877                 break;
3878         }
3879 }
3880
3881 void __attribute__((cold))
3882 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3883 {
3884         uint16_t i, rx_using_sse;
3885         struct ixgbe_adapter *adapter =
3886                 (struct ixgbe_adapter *)dev->data->dev_private;
3887
3888         /*
3889          * In order to allow Vector Rx there are a few configuration
3890          * conditions to be met and Rx Bulk Allocation should be allowed.
3891          */
3892         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3893             !adapter->rx_bulk_alloc_allowed) {
3894                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3895                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
3896                                     "not enabled",
3897                              dev->data->port_id);
3898
3899                 adapter->rx_vec_allowed = false;
3900         }
3901
3902         /*
3903          * Initialize the appropriate LRO callback.
3904          *
3905          * If all queues satisfy the bulk allocation preconditions
3906          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3907          * Otherwise use a single allocation version.
3908          */
3909         if (dev->data->lro) {
3910                 if (adapter->rx_bulk_alloc_allowed) {
3911                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3912                                            "allocation version");
3913                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3914                 } else {
3915                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3916                                            "allocation version");
3917                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3918                 }
3919         } else if (dev->data->scattered_rx) {
3920                 /*
3921                  * Set the non-LRO scattered callback: there are Vector and
3922                  * single allocation versions.
3923                  */
3924                 if (adapter->rx_vec_allowed) {
3925                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3926                                             "callback (port=%d).",
3927                                      dev->data->port_id);
3928
3929                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3930                 } else if (adapter->rx_bulk_alloc_allowed) {
3931                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3932                                            "allocation callback (port=%d).",
3933                                      dev->data->port_id);
3934                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3935                 } else {
3936                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3937                                             "single allocation) "
3938                                             "Scattered Rx callback "
3939                                             "(port=%d).",
3940                                      dev->data->port_id);
3941
3942                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3943                 }
3944         /*
3945          * Below we set "simple" callbacks according to port/queues parameters.
3946          * If parameters allow we are going to choose between the following
3947          * callbacks:
3948          *    - Vector
3949          *    - Bulk Allocation
3950          *    - Single buffer allocation (the simplest one)
3951          */
3952         } else if (adapter->rx_vec_allowed) {
3953                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3954                                     "burst size no less than %d (port=%d).",
3955                              RTE_IXGBE_DESCS_PER_LOOP,
3956                              dev->data->port_id);
3957
3958                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3959         } else if (adapter->rx_bulk_alloc_allowed) {
3960                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3961                                     "satisfied. Rx Burst Bulk Alloc function "
3962                                     "will be used on port=%d.",
3963                              dev->data->port_id);
3964
3965                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
3966         } else {
3967                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3968                                     "satisfied, or Scattered Rx is requested "
3969                                     "(port=%d).",
3970                              dev->data->port_id);
3971
3972                 dev->rx_pkt_burst = ixgbe_recv_pkts;
3973         }
3974
3975         /* Propagate information about RX function choice through all queues. */
3976
3977         rx_using_sse =
3978                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
3979                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
3980
3981         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3982                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3983                 rxq->rx_using_sse = rx_using_sse;
3984         }
3985 }
3986
3987 /**
3988  * ixgbe_set_rsc - configure RSC related port HW registers
3989  *
3990  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
3991  * of 82599 Spec (x540 configuration is virtually the same).
3992  *
3993  * @dev port handle
3994  *
3995  * Returns 0 in case of success or a non-zero error code
3996  */
3997 static int
3998 ixgbe_set_rsc(struct rte_eth_dev *dev)
3999 {
4000         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4001         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4002         struct rte_eth_dev_info dev_info = { 0 };
4003         bool rsc_capable = false;
4004         uint16_t i;
4005         uint32_t rdrxctl;
4006
4007         /* Sanity check */
4008         dev->dev_ops->dev_infos_get(dev, &dev_info);
4009         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4010                 rsc_capable = true;
4011
4012         if (!rsc_capable && rx_conf->enable_lro) {
4013                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4014                                    "support it");
4015                 return -EINVAL;
4016         }
4017
4018         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4019
4020         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4021                 /*
4022                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4023                  * 3.0 RSC configuration requires HW CRC stripping being
4024                  * enabled. If user requested both HW CRC stripping off
4025                  * and RSC on - return an error.
4026                  */
4027                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4028                                     "is disabled");
4029                 return -EINVAL;
4030         }
4031
4032         /* RFCTL configuration  */
4033         if (rsc_capable) {
4034                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4035                 if (rx_conf->enable_lro)
4036                         /*
4037                          * Since NFS packets coalescing is not supported - clear
4038                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4039                          * enabled.
4040                          */
4041                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4042                                    IXGBE_RFCTL_NFSR_DIS);
4043                 else
4044                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4045
4046                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4047         }
4048
4049         /* If LRO hasn't been requested - we are done here. */
4050         if (!rx_conf->enable_lro)
4051                 return 0;
4052
4053         /* Set RDRXCTL.RSCACKC bit */
4054         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4055         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4056         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4057
4058         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4059         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4060                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4061                 uint32_t srrctl =
4062                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4063                 uint32_t rscctl =
4064                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4065                 uint32_t psrtype =
4066                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4067                 uint32_t eitr =
4068                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4069
4070                 /*
4071                  * ixgbe PMD doesn't support header-split at the moment.
4072                  *
4073                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4074                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4075                  * should be configured even if header split is not
4076                  * enabled. We will configure it 128 bytes following the
4077                  * recommendation in the spec.
4078                  */
4079                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4080                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4081                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4082
4083                 /*
4084                  * TODO: Consider setting the Receive Descriptor Minimum
4085                  * Threshold Size for an RSC case. This is not an obviously
4086                  * beneficiary option but the one worth considering...
4087                  */
4088
4089                 rscctl |= IXGBE_RSCCTL_RSCEN;
4090                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4091                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4092
4093                 /*
4094                  * RSC: Set ITR interval corresponding to 2K ints/s.
4095                  *
4096                  * Full-sized RSC aggregations for a 10Gb/s link will
4097                  * arrive at about 20K aggregation/s rate.
4098                  *
4099                  * 2K inst/s rate will make only 10% of the
4100                  * aggregations to be closed due to the interrupt timer
4101                  * expiration for a streaming at wire-speed case.
4102                  *
4103                  * For a sparse streaming case this setting will yield
4104                  * at most 500us latency for a single RSC aggregation.
4105                  */
4106                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4107                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4108
4109                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4110                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4111                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4112                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4113
4114                 /*
4115                  * RSC requires the mapping of the queue to the
4116                  * interrupt vector.
4117                  */
4118                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4119         }
4120
4121         dev->data->lro = 1;
4122
4123         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4124
4125         return 0;
4126 }
4127
4128 /*
4129  * Initializes Receive Unit.
4130  */
4131 int __attribute__((cold))
4132 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4133 {
4134         struct ixgbe_hw     *hw;
4135         struct ixgbe_rx_queue *rxq;
4136         uint64_t bus_addr;
4137         uint32_t rxctrl;
4138         uint32_t fctrl;
4139         uint32_t hlreg0;
4140         uint32_t maxfrs;
4141         uint32_t srrctl;
4142         uint32_t rdrxctl;
4143         uint32_t rxcsum;
4144         uint16_t buf_size;
4145         uint16_t i;
4146         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4147         int rc;
4148
4149         PMD_INIT_FUNC_TRACE();
4150         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4151
4152         /*
4153          * Make sure receives are disabled while setting
4154          * up the RX context (registers, descriptor rings, etc.).
4155          */
4156         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4157         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4158
4159         /* Enable receipt of broadcasted frames */
4160         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4161         fctrl |= IXGBE_FCTRL_BAM;
4162         fctrl |= IXGBE_FCTRL_DPF;
4163         fctrl |= IXGBE_FCTRL_PMCF;
4164         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4165
4166         /*
4167          * Configure CRC stripping, if any.
4168          */
4169         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4170         if (rx_conf->hw_strip_crc)
4171                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4172         else
4173                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4174
4175         /*
4176          * Configure jumbo frame support, if any.
4177          */
4178         if (rx_conf->jumbo_frame == 1) {
4179                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4180                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4181                 maxfrs &= 0x0000FFFF;
4182                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4183                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4184         } else
4185                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4186
4187         /*
4188          * If loopback mode is configured for 82599, set LPBK bit.
4189          */
4190         if (hw->mac.type == ixgbe_mac_82599EB &&
4191                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4192                 hlreg0 |= IXGBE_HLREG0_LPBK;
4193         else
4194                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4195
4196         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4197
4198         /* Setup RX queues */
4199         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4200                 rxq = dev->data->rx_queues[i];
4201
4202                 /*
4203                  * Reset crc_len in case it was changed after queue setup by a
4204                  * call to configure.
4205                  */
4206                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4207
4208                 /* Setup the Base and Length of the Rx Descriptor Rings */
4209                 bus_addr = rxq->rx_ring_phys_addr;
4210                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4211                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4212                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4213                                 (uint32_t)(bus_addr >> 32));
4214                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4215                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4216                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4217                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4218
4219                 /* Configure the SRRCTL register */
4220 #ifdef RTE_HEADER_SPLIT_ENABLE
4221                 /*
4222                  * Configure Header Split
4223                  */
4224                 if (rx_conf->header_split) {
4225                         if (hw->mac.type == ixgbe_mac_82599EB) {
4226                                 /* Must setup the PSRTYPE register */
4227                                 uint32_t psrtype;
4228                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4229                                         IXGBE_PSRTYPE_UDPHDR   |
4230                                         IXGBE_PSRTYPE_IPV4HDR  |
4231                                         IXGBE_PSRTYPE_IPV6HDR;
4232                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4233                         }
4234                         srrctl = ((rx_conf->split_hdr_size <<
4235                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4236                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4237                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4238                 } else
4239 #endif
4240                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4241
4242                 /* Set if packets are dropped when no descriptors available */
4243                 if (rxq->drop_en)
4244                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4245
4246                 /*
4247                  * Configure the RX buffer size in the BSIZEPACKET field of
4248                  * the SRRCTL register of the queue.
4249                  * The value is in 1 KB resolution. Valid values can be from
4250                  * 1 KB to 16 KB.
4251                  */
4252                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4253                         RTE_PKTMBUF_HEADROOM);
4254                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4255                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4256
4257                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4258
4259                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4260                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4261
4262                 /* It adds dual VLAN length for supporting dual VLAN */
4263                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4264                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4265                         dev->data->scattered_rx = 1;
4266         }
4267
4268         if (rx_conf->enable_scatter)
4269                 dev->data->scattered_rx = 1;
4270
4271         /*
4272          * Device configured with multiple RX queues.
4273          */
4274         ixgbe_dev_mq_rx_configure(dev);
4275
4276         /*
4277          * Setup the Checksum Register.
4278          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4279          * Enable IP/L4 checkum computation by hardware if requested to do so.
4280          */
4281         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4282         rxcsum |= IXGBE_RXCSUM_PCSD;
4283         if (rx_conf->hw_ip_checksum)
4284                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4285         else
4286                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4287
4288         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4289
4290         if (hw->mac.type == ixgbe_mac_82599EB ||
4291             hw->mac.type == ixgbe_mac_X540) {
4292                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4293                 if (rx_conf->hw_strip_crc)
4294                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4295                 else
4296                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4297                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4298                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4299         }
4300
4301         rc = ixgbe_set_rsc(dev);
4302         if (rc)
4303                 return rc;
4304
4305         ixgbe_set_rx_function(dev);
4306
4307         return 0;
4308 }
4309
4310 /*
4311  * Initializes Transmit Unit.
4312  */
4313 void __attribute__((cold))
4314 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4315 {
4316         struct ixgbe_hw     *hw;
4317         struct ixgbe_tx_queue *txq;
4318         uint64_t bus_addr;
4319         uint32_t hlreg0;
4320         uint32_t txctrl;
4321         uint16_t i;
4322
4323         PMD_INIT_FUNC_TRACE();
4324         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4325
4326         /* Enable TX CRC (checksum offload requirement) and hw padding
4327          * (TSO requirement) */
4328         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4329         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4330         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4331
4332         /* Setup the Base and Length of the Tx Descriptor Rings */
4333         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4334                 txq = dev->data->tx_queues[i];
4335
4336                 bus_addr = txq->tx_ring_phys_addr;
4337                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4338                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4339                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4340                                 (uint32_t)(bus_addr >> 32));
4341                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4342                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4343                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4344                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4345                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4346
4347                 /*
4348                  * Disable Tx Head Writeback RO bit, since this hoses
4349                  * bookkeeping if things aren't delivered in order.
4350                  */
4351                 switch (hw->mac.type) {
4352                         case ixgbe_mac_82598EB:
4353                                 txctrl = IXGBE_READ_REG(hw,
4354                                                         IXGBE_DCA_TXCTRL(txq->reg_idx));
4355                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4356                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4357                                                 txctrl);
4358                                 break;
4359
4360                         case ixgbe_mac_82599EB:
4361                         case ixgbe_mac_X540:
4362                         case ixgbe_mac_X550:
4363                         case ixgbe_mac_X550EM_x:
4364                         default:
4365                                 txctrl = IXGBE_READ_REG(hw,
4366                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4367                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4368                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4369                                                 txctrl);
4370                                 break;
4371                 }
4372         }
4373
4374         /* Device configured with multiple TX queues. */
4375         ixgbe_dev_mq_tx_configure(dev);
4376 }
4377
4378 /*
4379  * Set up link for 82599 loopback mode Tx->Rx.
4380  */
4381 static inline void __attribute__((cold))
4382 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4383 {
4384         PMD_INIT_FUNC_TRACE();
4385
4386         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4387                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4388                                 IXGBE_SUCCESS) {
4389                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4390                         /* ignore error */
4391                         return;
4392                 }
4393         }
4394
4395         /* Restart link */
4396         IXGBE_WRITE_REG(hw,
4397                         IXGBE_AUTOC,
4398                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4399         ixgbe_reset_pipeline_82599(hw);
4400
4401         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4402         msec_delay(50);
4403 }
4404
4405
4406 /*
4407  * Start Transmit and Receive Units.
4408  */
4409 int __attribute__((cold))
4410 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4411 {
4412         struct ixgbe_hw     *hw;
4413         struct ixgbe_tx_queue *txq;
4414         struct ixgbe_rx_queue *rxq;
4415         uint32_t txdctl;
4416         uint32_t dmatxctl;
4417         uint32_t rxctrl;
4418         uint16_t i;
4419         int ret = 0;
4420
4421         PMD_INIT_FUNC_TRACE();
4422         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4423
4424         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4425                 txq = dev->data->tx_queues[i];
4426                 /* Setup Transmit Threshold Registers */
4427                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4428                 txdctl |= txq->pthresh & 0x7F;
4429                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4430                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4431                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4432         }
4433
4434         if (hw->mac.type != ixgbe_mac_82598EB) {
4435                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4436                 dmatxctl |= IXGBE_DMATXCTL_TE;
4437                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4438         }
4439
4440         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4441                 txq = dev->data->tx_queues[i];
4442                 if (!txq->tx_deferred_start) {
4443                         ret = ixgbe_dev_tx_queue_start(dev, i);
4444                         if (ret < 0)
4445                                 return ret;
4446                 }
4447         }
4448
4449         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4450                 rxq = dev->data->rx_queues[i];
4451                 if (!rxq->rx_deferred_start) {
4452                         ret = ixgbe_dev_rx_queue_start(dev, i);
4453                         if (ret < 0)
4454                                 return ret;
4455                 }
4456         }
4457
4458         /* Enable Receive engine */
4459         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4460         if (hw->mac.type == ixgbe_mac_82598EB)
4461                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4462         rxctrl |= IXGBE_RXCTRL_RXEN;
4463         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4464
4465         /* If loopback mode is enabled for 82599, set up the link accordingly */
4466         if (hw->mac.type == ixgbe_mac_82599EB &&
4467                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4468                 ixgbe_setup_loopback_link_82599(hw);
4469
4470         return 0;
4471 }
4472
4473 /*
4474  * Start Receive Units for specified queue.
4475  */
4476 int __attribute__((cold))
4477 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4478 {
4479         struct ixgbe_hw     *hw;
4480         struct ixgbe_rx_queue *rxq;
4481         uint32_t rxdctl;
4482         int poll_ms;
4483
4484         PMD_INIT_FUNC_TRACE();
4485         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4486
4487         if (rx_queue_id < dev->data->nb_rx_queues) {
4488                 rxq = dev->data->rx_queues[rx_queue_id];
4489
4490                 /* Allocate buffers for descriptor rings */
4491                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4492                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4493                                      rx_queue_id);
4494                         return -1;
4495                 }
4496                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4497                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4498                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4499
4500                 /* Wait until RX Enable ready */
4501                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4502                 do {
4503                         rte_delay_ms(1);
4504                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4505                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4506                 if (!poll_ms)
4507                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4508                                      rx_queue_id);
4509                 rte_wmb();
4510                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4511                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4512                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4513         } else
4514                 return -1;
4515
4516         return 0;
4517 }
4518
4519 /*
4520  * Stop Receive Units for specified queue.
4521  */
4522 int __attribute__((cold))
4523 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4524 {
4525         struct ixgbe_hw     *hw;
4526         struct ixgbe_adapter *adapter =
4527                 (struct ixgbe_adapter *)dev->data->dev_private;
4528         struct ixgbe_rx_queue *rxq;
4529         uint32_t rxdctl;
4530         int poll_ms;
4531
4532         PMD_INIT_FUNC_TRACE();
4533         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4534
4535         if (rx_queue_id < dev->data->nb_rx_queues) {
4536                 rxq = dev->data->rx_queues[rx_queue_id];
4537
4538                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4539                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4540                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4541
4542                 /* Wait until RX Enable ready */
4543                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4544                 do {
4545                         rte_delay_ms(1);
4546                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4547                 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4548                 if (!poll_ms)
4549                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4550                                      rx_queue_id);
4551
4552                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4553
4554                 ixgbe_rx_queue_release_mbufs(rxq);
4555                 ixgbe_reset_rx_queue(adapter, rxq);
4556                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4557         } else
4558                 return -1;
4559
4560         return 0;
4561 }
4562
4563
4564 /*
4565  * Start Transmit Units for specified queue.
4566  */
4567 int __attribute__((cold))
4568 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4569 {
4570         struct ixgbe_hw     *hw;
4571         struct ixgbe_tx_queue *txq;
4572         uint32_t txdctl;
4573         int poll_ms;
4574
4575         PMD_INIT_FUNC_TRACE();
4576         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4577
4578         if (tx_queue_id < dev->data->nb_tx_queues) {
4579                 txq = dev->data->tx_queues[tx_queue_id];
4580                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4581                 txdctl |= IXGBE_TXDCTL_ENABLE;
4582                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4583
4584                 /* Wait until TX Enable ready */
4585                 if (hw->mac.type == ixgbe_mac_82599EB) {
4586                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4587                         do {
4588                                 rte_delay_ms(1);
4589                                 txdctl = IXGBE_READ_REG(hw,
4590                                         IXGBE_TXDCTL(txq->reg_idx));
4591                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4592                         if (!poll_ms)
4593                                 PMD_INIT_LOG(ERR, "Could not enable "
4594                                              "Tx Queue %d", tx_queue_id);
4595                 }
4596                 rte_wmb();
4597                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4598                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4599                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4600         } else
4601                 return -1;
4602
4603         return 0;
4604 }
4605
4606 /*
4607  * Stop Transmit Units for specified queue.
4608  */
4609 int __attribute__((cold))
4610 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4611 {
4612         struct ixgbe_hw     *hw;
4613         struct ixgbe_tx_queue *txq;
4614         uint32_t txdctl;
4615         uint32_t txtdh, txtdt;
4616         int poll_ms;
4617
4618         PMD_INIT_FUNC_TRACE();
4619         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4620
4621         if (tx_queue_id < dev->data->nb_tx_queues) {
4622                 txq = dev->data->tx_queues[tx_queue_id];
4623
4624                 /* Wait until TX queue is empty */
4625                 if (hw->mac.type == ixgbe_mac_82599EB) {
4626                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4627                         do {
4628                                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4629                                 txtdh = IXGBE_READ_REG(hw,
4630                                                 IXGBE_TDH(txq->reg_idx));
4631                                 txtdt = IXGBE_READ_REG(hw,
4632                                                 IXGBE_TDT(txq->reg_idx));
4633                         } while (--poll_ms && (txtdh != txtdt));
4634                         if (!poll_ms)
4635                                 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4636                                              "when stopping.", tx_queue_id);
4637                 }
4638
4639                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4640                 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4641                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4642
4643                 /* Wait until TX Enable ready */
4644                 if (hw->mac.type == ixgbe_mac_82599EB) {
4645                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4646                         do {
4647                                 rte_delay_ms(1);
4648                                 txdctl = IXGBE_READ_REG(hw,
4649                                                 IXGBE_TXDCTL(txq->reg_idx));
4650                         } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4651                         if (!poll_ms)
4652                                 PMD_INIT_LOG(ERR, "Could not disable "
4653                                              "Tx Queue %d", tx_queue_id);
4654                 }
4655
4656                 if (txq->ops != NULL) {
4657                         txq->ops->release_mbufs(txq);
4658                         txq->ops->reset(txq);
4659                 }
4660                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4661         } else
4662                 return -1;
4663
4664         return 0;
4665 }
4666
4667 void
4668 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4669         struct rte_eth_rxq_info *qinfo)
4670 {
4671         struct ixgbe_rx_queue *rxq;
4672
4673         rxq = dev->data->rx_queues[queue_id];
4674
4675         qinfo->mp = rxq->mb_pool;
4676         qinfo->scattered_rx = dev->data->scattered_rx;
4677         qinfo->nb_desc = rxq->nb_rx_desc;
4678
4679         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4680         qinfo->conf.rx_drop_en = rxq->drop_en;
4681         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4682 }
4683
4684 void
4685 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4686         struct rte_eth_txq_info *qinfo)
4687 {
4688         struct ixgbe_tx_queue *txq;
4689
4690         txq = dev->data->tx_queues[queue_id];
4691
4692         qinfo->nb_desc = txq->nb_tx_desc;
4693
4694         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4695         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4696         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4697
4698         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4699         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4700         qinfo->conf.txq_flags = txq->txq_flags;
4701         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4702 }
4703
4704 /*
4705  * [VF] Initializes Receive Unit.
4706  */
4707 int __attribute__((cold))
4708 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4709 {
4710         struct ixgbe_hw     *hw;
4711         struct ixgbe_rx_queue *rxq;
4712         uint64_t bus_addr;
4713         uint32_t srrctl, psrtype = 0;
4714         uint16_t buf_size;
4715         uint16_t i;
4716         int ret;
4717
4718         PMD_INIT_FUNC_TRACE();
4719         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4720
4721         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4722                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4723                         "it should be power of 2");
4724                 return -1;
4725         }
4726
4727         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4728                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4729                         "it should be equal to or less than %d",
4730                         hw->mac.max_rx_queues);
4731                 return -1;
4732         }
4733
4734         /*
4735          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4736          * disables the VF receipt of packets if the PF MTU is > 1500.
4737          * This is done to deal with 82599 limitations that imposes
4738          * the PF and all VFs to share the same MTU.
4739          * Then, the PF driver enables again the VF receipt of packet when
4740          * the VF driver issues a IXGBE_VF_SET_LPE request.
4741          * In the meantime, the VF device cannot be used, even if the VF driver
4742          * and the Guest VM network stack are ready to accept packets with a
4743          * size up to the PF MTU.
4744          * As a work-around to this PF behaviour, force the call to
4745          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4746          * VF packets received can work in all cases.
4747          */
4748         ixgbevf_rlpml_set_vf(hw,
4749                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4750
4751         /* Setup RX queues */
4752         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4753                 rxq = dev->data->rx_queues[i];
4754
4755                 /* Allocate buffers for descriptor rings */
4756                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4757                 if (ret)
4758                         return ret;
4759
4760                 /* Setup the Base and Length of the Rx Descriptor Rings */
4761                 bus_addr = rxq->rx_ring_phys_addr;
4762
4763                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4764                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4765                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4766                                 (uint32_t)(bus_addr >> 32));
4767                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4768                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4769                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4770                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4771
4772
4773                 /* Configure the SRRCTL register */
4774 #ifdef RTE_HEADER_SPLIT_ENABLE
4775                 /*
4776                  * Configure Header Split
4777                  */
4778                 if (dev->data->dev_conf.rxmode.header_split) {
4779                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4780                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4781                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4782                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4783                 } else
4784 #endif
4785                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4786
4787                 /* Set if packets are dropped when no descriptors available */
4788                 if (rxq->drop_en)
4789                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4790
4791                 /*
4792                  * Configure the RX buffer size in the BSIZEPACKET field of
4793                  * the SRRCTL register of the queue.
4794                  * The value is in 1 KB resolution. Valid values can be from
4795                  * 1 KB to 16 KB.
4796                  */
4797                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4798                         RTE_PKTMBUF_HEADROOM);
4799                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4800                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4801
4802                 /*
4803                  * VF modification to write virtual function SRRCTL register
4804                  */
4805                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4806
4807                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4808                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4809
4810                 if (dev->data->dev_conf.rxmode.enable_scatter ||
4811                     /* It adds dual VLAN length for supporting dual VLAN */
4812                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4813                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4814                         if (!dev->data->scattered_rx)
4815                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4816                         dev->data->scattered_rx = 1;
4817                 }
4818         }
4819
4820 #ifdef RTE_HEADER_SPLIT_ENABLE
4821         if (dev->data->dev_conf.rxmode.header_split)
4822                 /* Must setup the PSRTYPE register */
4823                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4824                         IXGBE_PSRTYPE_UDPHDR   |
4825                         IXGBE_PSRTYPE_IPV4HDR  |
4826                         IXGBE_PSRTYPE_IPV6HDR;
4827 #endif
4828
4829         /* Set RQPL for VF RSS according to max Rx queue */
4830         psrtype |= (dev->data->nb_rx_queues >> 1) <<
4831                 IXGBE_PSRTYPE_RQPL_SHIFT;
4832         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4833
4834         ixgbe_set_rx_function(dev);
4835
4836         return 0;
4837 }
4838
4839 /*
4840  * [VF] Initializes Transmit Unit.
4841  */
4842 void __attribute__((cold))
4843 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4844 {
4845         struct ixgbe_hw     *hw;
4846         struct ixgbe_tx_queue *txq;
4847         uint64_t bus_addr;
4848         uint32_t txctrl;
4849         uint16_t i;
4850
4851         PMD_INIT_FUNC_TRACE();
4852         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4853
4854         /* Setup the Base and Length of the Tx Descriptor Rings */
4855         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4856                 txq = dev->data->tx_queues[i];
4857                 bus_addr = txq->tx_ring_phys_addr;
4858                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4859                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4860                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4861                                 (uint32_t)(bus_addr >> 32));
4862                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4863                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4864                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4865                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4866                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4867
4868                 /*
4869                  * Disable Tx Head Writeback RO bit, since this hoses
4870                  * bookkeeping if things aren't delivered in order.
4871                  */
4872                 txctrl = IXGBE_READ_REG(hw,
4873                                 IXGBE_VFDCA_TXCTRL(i));
4874                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4875                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4876                                 txctrl);
4877         }
4878 }
4879
4880 /*
4881  * [VF] Start Transmit and Receive Units.
4882  */
4883 void __attribute__((cold))
4884 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4885 {
4886         struct ixgbe_hw     *hw;
4887         struct ixgbe_tx_queue *txq;
4888         struct ixgbe_rx_queue *rxq;
4889         uint32_t txdctl;
4890         uint32_t rxdctl;
4891         uint16_t i;
4892         int poll_ms;
4893
4894         PMD_INIT_FUNC_TRACE();
4895         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4896
4897         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4898                 txq = dev->data->tx_queues[i];
4899                 /* Setup Transmit Threshold Registers */
4900                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4901                 txdctl |= txq->pthresh & 0x7F;
4902                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4903                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4904                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4905         }
4906
4907         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4908
4909                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4910                 txdctl |= IXGBE_TXDCTL_ENABLE;
4911                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4912
4913                 poll_ms = 10;
4914                 /* Wait until TX Enable ready */
4915                 do {
4916                         rte_delay_ms(1);
4917                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4918                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4919                 if (!poll_ms)
4920                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4921         }
4922         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4923
4924                 rxq = dev->data->rx_queues[i];
4925
4926                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4927                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4928                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4929
4930                 /* Wait until RX Enable ready */
4931                 poll_ms = 10;
4932                 do {
4933                         rte_delay_ms(1);
4934                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4935                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4936                 if (!poll_ms)
4937                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4938                 rte_wmb();
4939                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4940
4941         }
4942 }
4943
4944 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4945 int __attribute__((weak))
4946 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4947 {
4948         return -1;
4949 }
4950
4951 uint16_t __attribute__((weak))
4952 ixgbe_recv_pkts_vec(
4953         void __rte_unused *rx_queue,
4954         struct rte_mbuf __rte_unused **rx_pkts,
4955         uint16_t __rte_unused nb_pkts)
4956 {
4957         return 0;
4958 }
4959
4960 uint16_t __attribute__((weak))
4961 ixgbe_recv_scattered_pkts_vec(
4962         void __rte_unused *rx_queue,
4963         struct rte_mbuf __rte_unused **rx_pkts,
4964         uint16_t __rte_unused nb_pkts)
4965 {
4966         return 0;
4967 }
4968
4969 int __attribute__((weak))
4970 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
4971 {
4972         return -1;
4973 }