ixgbe: offload VxLAN and NVGRE Tx checksum on X550
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_OUTER_IP_CKSUM)
90
91 static inline struct rte_mbuf *
92 rte_rxmbuf_alloc(struct rte_mempool *mp)
93 {
94         struct rte_mbuf *m;
95
96         m = __rte_mbuf_raw_alloc(mp);
97         __rte_mbuf_sanity_check_raw(m, 0);
98         return m;
99 }
100
101
102 #if 1
103 #define RTE_PMD_USE_PREFETCH
104 #endif
105
106 #ifdef RTE_PMD_USE_PREFETCH
107 /*
108  * Prefetch a cache line into all cache levels.
109  */
110 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
111 #else
112 #define rte_ixgbe_prefetch(p)   do {} while(0)
113 #endif
114
115 /*********************************************************************
116  *
117  *  TX functions
118  *
119  **********************************************************************/
120
121 /*
122  * Check for descriptors with their DD bit set and free mbufs.
123  * Return the total number of buffers freed.
124  */
125 static inline int __attribute__((always_inline))
126 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
127 {
128         struct ixgbe_tx_entry *txep;
129         uint32_t status;
130         int i;
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         /* free buffers one at a time */
144         if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
145                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
146                         txep->mbuf->next = NULL;
147                         rte_mempool_put(txep->mbuf->pool, txep->mbuf);
148                         txep->mbuf = NULL;
149                 }
150         } else {
151                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
152                         rte_pktmbuf_free_seg(txep->mbuf);
153                         txep->mbuf = NULL;
154                 }
155         }
156
157         /* buffers were freed, update counters */
158         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
159         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
160         if (txq->tx_next_dd >= txq->nb_tx_desc)
161                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
162
163         return txq->tx_rs_thresh;
164 }
165
166 /* Populate 4 descriptors with data from 4 mbufs */
167 static inline void
168 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
169 {
170         uint64_t buf_dma_addr;
171         uint32_t pkt_len;
172         int i;
173
174         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
175                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
176                 pkt_len = (*pkts)->data_len;
177
178                 /* write data to descriptor */
179                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
180
181                 txdp->read.cmd_type_len =
182                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
183
184                 txdp->read.olinfo_status =
185                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
186
187                 rte_prefetch0(&(*pkts)->pool);
188         }
189 }
190
191 /* Populate 1 descriptor with data from 1 mbuf */
192 static inline void
193 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
194 {
195         uint64_t buf_dma_addr;
196         uint32_t pkt_len;
197
198         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
199         pkt_len = (*pkts)->data_len;
200
201         /* write data to descriptor */
202         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
203         txdp->read.cmd_type_len =
204                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
205         txdp->read.olinfo_status =
206                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
207         rte_prefetch0(&(*pkts)->pool);
208 }
209
210 /*
211  * Fill H/W descriptor ring with mbuf data.
212  * Copy mbuf pointers to the S/W ring.
213  */
214 static inline void
215 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
216                       uint16_t nb_pkts)
217 {
218         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
219         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
220         const int N_PER_LOOP = 4;
221         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
222         int mainpart, leftover;
223         int i, j;
224
225         /*
226          * Process most of the packets in chunks of N pkts.  Any
227          * leftover packets will get processed one at a time.
228          */
229         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
230         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
231         for (i = 0; i < mainpart; i += N_PER_LOOP) {
232                 /* Copy N mbuf pointers to the S/W ring */
233                 for (j = 0; j < N_PER_LOOP; ++j) {
234                         (txep + i + j)->mbuf = *(pkts + i + j);
235                 }
236                 tx4(txdp + i, pkts + i);
237         }
238
239         if (unlikely(leftover > 0)) {
240                 for (i = 0; i < leftover; ++i) {
241                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
242                         tx1(txdp + mainpart + i, pkts + mainpart + i);
243                 }
244         }
245 }
246
247 static inline uint16_t
248 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
249              uint16_t nb_pkts)
250 {
251         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
252         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
253         uint16_t n = 0;
254
255         /*
256          * Begin scanning the H/W ring for done descriptors when the
257          * number of available descriptors drops below tx_free_thresh.  For
258          * each done descriptor, free the associated buffer.
259          */
260         if (txq->nb_tx_free < txq->tx_free_thresh)
261                 ixgbe_tx_free_bufs(txq);
262
263         /* Only use descriptors that are available */
264         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
265         if (unlikely(nb_pkts == 0))
266                 return 0;
267
268         /* Use exactly nb_pkts descriptors */
269         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
270
271         /*
272          * At this point, we know there are enough descriptors in the
273          * ring to transmit all the packets.  This assumes that each
274          * mbuf contains a single segment, and that no new offloads
275          * are expected, which would require a new context descriptor.
276          */
277
278         /*
279          * See if we're going to wrap-around. If so, handle the top
280          * of the descriptor ring first, then do the bottom.  If not,
281          * the processing looks just like the "bottom" part anyway...
282          */
283         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
284                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
285                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
286
287                 /*
288                  * We know that the last descriptor in the ring will need to
289                  * have its RS bit set because tx_rs_thresh has to be
290                  * a divisor of the ring size
291                  */
292                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
293                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
294                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
295
296                 txq->tx_tail = 0;
297         }
298
299         /* Fill H/W descriptor ring with mbuf data */
300         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
301         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
302
303         /*
304          * Determine if RS bit should be set
305          * This is what we actually want:
306          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
307          * but instead of subtracting 1 and doing >=, we can just do
308          * greater than without subtracting.
309          */
310         if (txq->tx_tail > txq->tx_next_rs) {
311                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
312                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
313                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
314                                                 txq->tx_rs_thresh);
315                 if (txq->tx_next_rs >= txq->nb_tx_desc)
316                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
317         }
318
319         /*
320          * Check for wrap-around. This would only happen if we used
321          * up to the last descriptor in the ring, no more, no less.
322          */
323         if (txq->tx_tail >= txq->nb_tx_desc)
324                 txq->tx_tail = 0;
325
326         /* update tail pointer */
327         rte_wmb();
328         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
329
330         return nb_pkts;
331 }
332
333 uint16_t
334 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
335                        uint16_t nb_pkts)
336 {
337         uint16_t nb_tx;
338
339         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
340         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
341                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
342
343         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
344         nb_tx = 0;
345         while (nb_pkts) {
346                 uint16_t ret, n;
347                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
348                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
349                 nb_tx = (uint16_t)(nb_tx + ret);
350                 nb_pkts = (uint16_t)(nb_pkts - ret);
351                 if (ret < n)
352                         break;
353         }
354
355         return nb_tx;
356 }
357
358 static inline void
359 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
360                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
361                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
362 {
363         uint32_t type_tucmd_mlhl;
364         uint32_t mss_l4len_idx = 0;
365         uint32_t ctx_idx;
366         uint32_t vlan_macip_lens;
367         union ixgbe_tx_offload tx_offload_mask;
368         uint32_t seqnum_seed = 0;
369
370         ctx_idx = txq->ctx_curr;
371         tx_offload_mask.data[0] = 0;
372         tx_offload_mask.data[1] = 0;
373         type_tucmd_mlhl = 0;
374
375         /* Specify which HW CTX to upload. */
376         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
377
378         if (ol_flags & PKT_TX_VLAN_PKT) {
379                 tx_offload_mask.vlan_tci |= ~0;
380         }
381
382         /* check if TCP segmentation required for this packet */
383         if (ol_flags & PKT_TX_TCP_SEG) {
384                 /* implies IP cksum in IPv4 */
385                 if (ol_flags & PKT_TX_IP_CKSUM)
386                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
387                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
388                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
389                 else
390                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
391                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
392                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
393
394                 tx_offload_mask.l2_len |= ~0;
395                 tx_offload_mask.l3_len |= ~0;
396                 tx_offload_mask.l4_len |= ~0;
397                 tx_offload_mask.tso_segsz |= ~0;
398                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
399                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
400         } else { /* no TSO, check if hardware checksum is needed */
401                 if (ol_flags & PKT_TX_IP_CKSUM) {
402                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
403                         tx_offload_mask.l2_len |= ~0;
404                         tx_offload_mask.l3_len |= ~0;
405                 }
406
407                 switch (ol_flags & PKT_TX_L4_MASK) {
408                 case PKT_TX_UDP_CKSUM:
409                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
410                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
411                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                         break;
415                 case PKT_TX_TCP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
419                         tx_offload_mask.l2_len |= ~0;
420                         tx_offload_mask.l3_len |= ~0;
421                         break;
422                 case PKT_TX_SCTP_CKSUM:
423                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
424                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
425                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
426                         tx_offload_mask.l2_len |= ~0;
427                         tx_offload_mask.l3_len |= ~0;
428                         break;
429                 default:
430                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
431                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
432                         break;
433                 }
434         }
435
436         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
437                 tx_offload_mask.outer_l2_len |= ~0;
438                 tx_offload_mask.outer_l3_len |= ~0;
439                 tx_offload_mask.l2_len |= ~0;
440                 seqnum_seed |= tx_offload.outer_l3_len
441                                << IXGBE_ADVTXD_OUTER_IPLEN;
442                 seqnum_seed |= tx_offload.l2_len
443                                << IXGBE_ADVTXD_TUNNEL_LEN;
444         }
445
446         txq->ctx_cache[ctx_idx].flags = ol_flags;
447         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
448                 tx_offload_mask.data[0] & tx_offload.data[0];
449         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
450                 tx_offload_mask.data[1] & tx_offload.data[1];
451         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
452
453         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
454         vlan_macip_lens = tx_offload.l3_len;
455         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
456                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         else
459                 vlan_macip_lens |= (tx_offload.l2_len <<
460                                     IXGBE_ADVTXD_MACLEN_SHIFT);
461         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
462         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
463         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
464         ctx_txd->seqnum_seed     = seqnum_seed;
465 }
466
467 /*
468  * Check which hardware context can be used. Use the existing match
469  * or create a new context descriptor.
470  */
471 static inline uint32_t
472 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
473                 union ixgbe_tx_offload tx_offload)
474 {
475         /* If match with the current used context */
476         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
477                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
478                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
479                  & tx_offload.data[0])) &&
480                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
481                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
482                  & tx_offload.data[1])))) {
483                         return txq->ctx_curr;
484         }
485
486         /* What if match with the next context  */
487         txq->ctx_curr ^= 1;
488         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
489                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
490                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
491                  & tx_offload.data[0])) &&
492                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
493                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
494                  & tx_offload.data[1])))) {
495                         return txq->ctx_curr;
496         }
497
498         /* Mismatch, use the previous context */
499         return IXGBE_CTX_NUM;
500 }
501
502 static inline uint32_t
503 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
504 {
505         uint32_t tmp = 0;
506         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         if (ol_flags & PKT_TX_IP_CKSUM)
509                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
510         if (ol_flags & PKT_TX_TCP_SEG)
511                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
512         return tmp;
513 }
514
515 static inline uint32_t
516 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
517 {
518         uint32_t cmdtype = 0;
519         if (ol_flags & PKT_TX_VLAN_PKT)
520                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
521         if (ol_flags & PKT_TX_TCP_SEG)
522                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
523         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
524                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
525         return cmdtype;
526 }
527
528 /* Default RS bit threshold values */
529 #ifndef DEFAULT_TX_RS_THRESH
530 #define DEFAULT_TX_RS_THRESH   32
531 #endif
532 #ifndef DEFAULT_TX_FREE_THRESH
533 #define DEFAULT_TX_FREE_THRESH 32
534 #endif
535
536 /* Reset transmit descriptors after they have been used */
537 static inline int
538 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
539 {
540         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
541         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
542         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
543         uint16_t nb_tx_desc = txq->nb_tx_desc;
544         uint16_t desc_to_clean_to;
545         uint16_t nb_tx_to_clean;
546         uint32_t status;
547
548         /* Determine the last descriptor needing to be cleaned */
549         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
550         if (desc_to_clean_to >= nb_tx_desc)
551                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
552
553         /* Check to make sure the last descriptor to clean is done */
554         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
555         status = txr[desc_to_clean_to].wb.status;
556         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
557         {
558                 PMD_TX_FREE_LOG(DEBUG,
559                                 "TX descriptor %4u is not done"
560                                 "(port=%d queue=%d)",
561                                 desc_to_clean_to,
562                                 txq->port_id, txq->queue_id);
563                 /* Failed to clean any descriptors, better luck next time */
564                 return -(1);
565         }
566
567         /* Figure out how many descriptors will be cleaned */
568         if (last_desc_cleaned > desc_to_clean_to)
569                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
570                                                         desc_to_clean_to);
571         else
572                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
573                                                 last_desc_cleaned);
574
575         PMD_TX_FREE_LOG(DEBUG,
576                         "Cleaning %4u TX descriptors: %4u to %4u "
577                         "(port=%d queue=%d)",
578                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
579                         txq->port_id, txq->queue_id);
580
581         /*
582          * The last descriptor to clean is done, so that means all the
583          * descriptors from the last descriptor that was cleaned
584          * up to the last descriptor with the RS bit set
585          * are done. Only reset the threshold descriptor.
586          */
587         txr[desc_to_clean_to].wb.status = 0;
588
589         /* Update the txq to reflect the last descriptor that was cleaned */
590         txq->last_desc_cleaned = desc_to_clean_to;
591         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
592
593         /* No Error */
594         return 0;
595 }
596
597 uint16_t
598 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
599                 uint16_t nb_pkts)
600 {
601         struct ixgbe_tx_queue *txq;
602         struct ixgbe_tx_entry *sw_ring;
603         struct ixgbe_tx_entry *txe, *txn;
604         volatile union ixgbe_adv_tx_desc *txr;
605         volatile union ixgbe_adv_tx_desc *txd, *txp;
606         struct rte_mbuf     *tx_pkt;
607         struct rte_mbuf     *m_seg;
608         uint64_t buf_dma_addr;
609         uint32_t olinfo_status;
610         uint32_t cmd_type_len;
611         uint32_t pkt_len;
612         uint16_t slen;
613         uint64_t ol_flags;
614         uint16_t tx_id;
615         uint16_t tx_last;
616         uint16_t nb_tx;
617         uint16_t nb_used;
618         uint64_t tx_ol_req;
619         uint32_t ctx = 0;
620         uint32_t new_ctx;
621         union ixgbe_tx_offload tx_offload;
622
623         tx_offload.data[0] = 0;
624         tx_offload.data[1] = 0;
625         txq = tx_queue;
626         sw_ring = txq->sw_ring;
627         txr     = txq->tx_ring;
628         tx_id   = txq->tx_tail;
629         txe = &sw_ring[tx_id];
630         txp = NULL;
631
632         /* Determine if the descriptor ring needs to be cleaned. */
633         if (txq->nb_tx_free < txq->tx_free_thresh)
634                 ixgbe_xmit_cleanup(txq);
635
636         rte_prefetch0(&txe->mbuf->pool);
637
638         /* TX loop */
639         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
640                 new_ctx = 0;
641                 tx_pkt = *tx_pkts++;
642                 pkt_len = tx_pkt->pkt_len;
643
644                 /*
645                  * Determine how many (if any) context descriptors
646                  * are needed for offload functionality.
647                  */
648                 ol_flags = tx_pkt->ol_flags;
649
650                 /* If hardware offload required */
651                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
652                 if (tx_ol_req) {
653                         tx_offload.l2_len = tx_pkt->l2_len;
654                         tx_offload.l3_len = tx_pkt->l3_len;
655                         tx_offload.l4_len = tx_pkt->l4_len;
656                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
657                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
658                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
659                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
660
661                         /* If new context need be built or reuse the exist ctx. */
662                         ctx = what_advctx_update(txq, tx_ol_req,
663                                 tx_offload);
664                         /* Only allocate context descriptor if required*/
665                         new_ctx = (ctx == IXGBE_CTX_NUM);
666                         ctx = txq->ctx_curr;
667                 }
668
669                 /*
670                  * Keep track of how many descriptors are used this loop
671                  * This will always be the number of segments + the number of
672                  * Context descriptors required to transmit the packet
673                  */
674                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
675
676                 if (txp != NULL &&
677                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
678                         /* set RS on the previous packet in the burst */
679                         txp->read.cmd_type_len |=
680                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
681
682                 /*
683                  * The number of descriptors that must be allocated for a
684                  * packet is the number of segments of that packet, plus 1
685                  * Context Descriptor for the hardware offload, if any.
686                  * Determine the last TX descriptor to allocate in the TX ring
687                  * for the packet, starting from the current position (tx_id)
688                  * in the ring.
689                  */
690                 tx_last = (uint16_t) (tx_id + nb_used - 1);
691
692                 /* Circular ring */
693                 if (tx_last >= txq->nb_tx_desc)
694                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
695
696                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
697                            " tx_first=%u tx_last=%u",
698                            (unsigned) txq->port_id,
699                            (unsigned) txq->queue_id,
700                            (unsigned) pkt_len,
701                            (unsigned) tx_id,
702                            (unsigned) tx_last);
703
704                 /*
705                  * Make sure there are enough TX descriptors available to
706                  * transmit the entire packet.
707                  * nb_used better be less than or equal to txq->tx_rs_thresh
708                  */
709                 if (nb_used > txq->nb_tx_free) {
710                         PMD_TX_FREE_LOG(DEBUG,
711                                         "Not enough free TX descriptors "
712                                         "nb_used=%4u nb_free=%4u "
713                                         "(port=%d queue=%d)",
714                                         nb_used, txq->nb_tx_free,
715                                         txq->port_id, txq->queue_id);
716
717                         if (ixgbe_xmit_cleanup(txq) != 0) {
718                                 /* Could not clean any descriptors */
719                                 if (nb_tx == 0)
720                                         return 0;
721                                 goto end_of_tx;
722                         }
723
724                         /* nb_used better be <= txq->tx_rs_thresh */
725                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
726                                 PMD_TX_FREE_LOG(DEBUG,
727                                         "The number of descriptors needed to "
728                                         "transmit the packet exceeds the "
729                                         "RS bit threshold. This will impact "
730                                         "performance."
731                                         "nb_used=%4u nb_free=%4u "
732                                         "tx_rs_thresh=%4u. "
733                                         "(port=%d queue=%d)",
734                                         nb_used, txq->nb_tx_free,
735                                         txq->tx_rs_thresh,
736                                         txq->port_id, txq->queue_id);
737                                 /*
738                                  * Loop here until there are enough TX
739                                  * descriptors or until the ring cannot be
740                                  * cleaned.
741                                  */
742                                 while (nb_used > txq->nb_tx_free) {
743                                         if (ixgbe_xmit_cleanup(txq) != 0) {
744                                                 /*
745                                                  * Could not clean any
746                                                  * descriptors
747                                                  */
748                                                 if (nb_tx == 0)
749                                                         return 0;
750                                                 goto end_of_tx;
751                                         }
752                                 }
753                         }
754                 }
755
756                 /*
757                  * By now there are enough free TX descriptors to transmit
758                  * the packet.
759                  */
760
761                 /*
762                  * Set common flags of all TX Data Descriptors.
763                  *
764                  * The following bits must be set in all Data Descriptors:
765                  *   - IXGBE_ADVTXD_DTYP_DATA
766                  *   - IXGBE_ADVTXD_DCMD_DEXT
767                  *
768                  * The following bits must be set in the first Data Descriptor
769                  * and are ignored in the other ones:
770                  *   - IXGBE_ADVTXD_DCMD_IFCS
771                  *   - IXGBE_ADVTXD_MAC_1588
772                  *   - IXGBE_ADVTXD_DCMD_VLE
773                  *
774                  * The following bits must only be set in the last Data
775                  * Descriptor:
776                  *   - IXGBE_TXD_CMD_EOP
777                  *
778                  * The following bits can be set in any Data Descriptor, but
779                  * are only set in the last Data Descriptor:
780                  *   - IXGBE_TXD_CMD_RS
781                  */
782                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
783                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
784
785 #ifdef RTE_LIBRTE_IEEE1588
786                 if (ol_flags & PKT_TX_IEEE1588_TMST)
787                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
788 #endif
789
790                 olinfo_status = 0;
791                 if (tx_ol_req) {
792
793                         if (ol_flags & PKT_TX_TCP_SEG) {
794                                 /* when TSO is on, paylen in descriptor is the
795                                  * not the packet len but the tcp payload len */
796                                 pkt_len -= (tx_offload.l2_len +
797                                         tx_offload.l3_len + tx_offload.l4_len);
798                         }
799
800                         /*
801                          * Setup the TX Advanced Context Descriptor if required
802                          */
803                         if (new_ctx) {
804                                 volatile struct ixgbe_adv_tx_context_desc *
805                                     ctx_txd;
806
807                                 ctx_txd = (volatile struct
808                                     ixgbe_adv_tx_context_desc *)
809                                     &txr[tx_id];
810
811                                 txn = &sw_ring[txe->next_id];
812                                 rte_prefetch0(&txn->mbuf->pool);
813
814                                 if (txe->mbuf != NULL) {
815                                         rte_pktmbuf_free_seg(txe->mbuf);
816                                         txe->mbuf = NULL;
817                                 }
818
819                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
820                                         tx_offload);
821
822                                 txe->last_id = tx_last;
823                                 tx_id = txe->next_id;
824                                 txe = txn;
825                         }
826
827                         /*
828                          * Setup the TX Advanced Data Descriptor,
829                          * This path will go through
830                          * whatever new/reuse the context descriptor
831                          */
832                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
833                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
834                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
835                 }
836
837                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
838
839                 m_seg = tx_pkt;
840                 do {
841                         txd = &txr[tx_id];
842                         txn = &sw_ring[txe->next_id];
843                         rte_prefetch0(&txn->mbuf->pool);
844
845                         if (txe->mbuf != NULL)
846                                 rte_pktmbuf_free_seg(txe->mbuf);
847                         txe->mbuf = m_seg;
848
849                         /*
850                          * Set up Transmit Data Descriptor.
851                          */
852                         slen = m_seg->data_len;
853                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
854                         txd->read.buffer_addr =
855                                 rte_cpu_to_le_64(buf_dma_addr);
856                         txd->read.cmd_type_len =
857                                 rte_cpu_to_le_32(cmd_type_len | slen);
858                         txd->read.olinfo_status =
859                                 rte_cpu_to_le_32(olinfo_status);
860                         txe->last_id = tx_last;
861                         tx_id = txe->next_id;
862                         txe = txn;
863                         m_seg = m_seg->next;
864                 } while (m_seg != NULL);
865
866                 /*
867                  * The last packet data descriptor needs End Of Packet (EOP)
868                  */
869                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
870                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
871                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
872
873                 /* Set RS bit only on threshold packets' last descriptor */
874                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
875                         PMD_TX_FREE_LOG(DEBUG,
876                                         "Setting RS bit on TXD id="
877                                         "%4u (port=%d queue=%d)",
878                                         tx_last, txq->port_id, txq->queue_id);
879
880                         cmd_type_len |= IXGBE_TXD_CMD_RS;
881
882                         /* Update txq RS bit counters */
883                         txq->nb_tx_used = 0;
884                         txp = NULL;
885                 } else
886                         txp = txd;
887
888                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
889         }
890
891 end_of_tx:
892         /* set RS on last packet in the burst */
893         if (txp != NULL)
894                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
895
896         rte_wmb();
897
898         /*
899          * Set the Transmit Descriptor Tail (TDT)
900          */
901         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
902                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
903                    (unsigned) tx_id, (unsigned) nb_tx);
904         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
905         txq->tx_tail = tx_id;
906
907         return nb_tx;
908 }
909
910 /*********************************************************************
911  *
912  *  RX functions
913  *
914  **********************************************************************/
915 #define IXGBE_PACKET_TYPE_IPV4              0X01
916 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
917 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
918 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
919 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
921 #define IXGBE_PACKET_TYPE_IPV6              0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
924 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
925 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
927 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
928 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
933 #define IXGBE_PACKET_TYPE_MAX               0X80
934 #define IXGBE_PACKET_TYPE_MASK              0X7F
935 #define IXGBE_PACKET_TYPE_SHIFT             0X04
936 static inline uint32_t
937 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
938 {
939         static const uint32_t
940                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
941                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
942                         RTE_PTYPE_L3_IPV4,
943                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
944                         RTE_PTYPE_L3_IPV4_EXT,
945                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
946                         RTE_PTYPE_L3_IPV6,
947                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
948                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
949                         RTE_PTYPE_INNER_L3_IPV6,
950                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
951                         RTE_PTYPE_L3_IPV6_EXT,
952                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
953                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
954                         RTE_PTYPE_INNER_L3_IPV6_EXT,
955                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
956                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
957                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
958                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
959                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
960                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
961                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
962                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
963                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
964                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
965                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
966                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
967                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
968                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
969                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
970                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
971                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
972                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
973                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
974                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
975                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
976                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
977                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
978                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
979                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
980                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
981                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
982                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
983         };
984         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
985                 return RTE_PTYPE_UNKNOWN;
986
987         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
988                                 IXGBE_PACKET_TYPE_MASK;
989
990         return ptype_table[pkt_info];
991 }
992
993 static inline uint64_t
994 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
995 {
996         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
997                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
998                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
999                 PKT_RX_RSS_HASH, 0, 0, 0,
1000                 0, 0, 0,  PKT_RX_FDIR,
1001         };
1002 #ifdef RTE_LIBRTE_IEEE1588
1003         static uint64_t ip_pkt_etqf_map[8] = {
1004                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1005                 0, 0, 0, 0,
1006         };
1007
1008         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1009                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1010                                 ip_rss_types_map[pkt_info & 0XF];
1011         else
1012                 return ip_rss_types_map[pkt_info & 0XF];
1013 #else
1014         return ip_rss_types_map[pkt_info & 0XF];
1015 #endif
1016 }
1017
1018 static inline uint64_t
1019 rx_desc_status_to_pkt_flags(uint32_t rx_status)
1020 {
1021         uint64_t pkt_flags;
1022
1023         /*
1024          * Check if VLAN present only.
1025          * Do not check whether L3/L4 rx checksum done by NIC or not,
1026          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1027          */
1028         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
1029
1030 #ifdef RTE_LIBRTE_IEEE1588
1031         if (rx_status & IXGBE_RXD_STAT_TMST)
1032                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1033 #endif
1034         return pkt_flags;
1035 }
1036
1037 static inline uint64_t
1038 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1039 {
1040         uint64_t pkt_flags;
1041
1042         /*
1043          * Bit 31: IPE, IPv4 checksum error
1044          * Bit 30: L4I, L4I integrity error
1045          */
1046         static uint64_t error_to_pkt_flags_map[4] = {
1047                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1048                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1049         };
1050         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1051                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1052
1053         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1054             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1055                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1056         }
1057
1058         return pkt_flags;
1059 }
1060
1061 /*
1062  * LOOK_AHEAD defines how many desc statuses to check beyond the
1063  * current descriptor.
1064  * It must be a pound define for optimal performance.
1065  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1066  * function only works with LOOK_AHEAD=8.
1067  */
1068 #define LOOK_AHEAD 8
1069 #if (LOOK_AHEAD != 8)
1070 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1071 #endif
1072 static inline int
1073 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1074 {
1075         volatile union ixgbe_adv_rx_desc *rxdp;
1076         struct ixgbe_rx_entry *rxep;
1077         struct rte_mbuf *mb;
1078         uint16_t pkt_len;
1079         uint64_t pkt_flags;
1080         int nb_dd;
1081         uint32_t s[LOOK_AHEAD];
1082         uint16_t pkt_info[LOOK_AHEAD];
1083         int i, j, nb_rx = 0;
1084         uint32_t status;
1085
1086         /* get references to current descriptor and S/W ring entry */
1087         rxdp = &rxq->rx_ring[rxq->rx_tail];
1088         rxep = &rxq->sw_ring[rxq->rx_tail];
1089
1090         status = rxdp->wb.upper.status_error;
1091         /* check to make sure there is at least 1 packet to receive */
1092         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1093                 return 0;
1094
1095         /*
1096          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1097          * reference packets that are ready to be received.
1098          */
1099         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1100              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1101         {
1102                 /* Read desc statuses backwards to avoid race condition */
1103                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1104                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1105
1106                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1107                         pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1108                                                 hs_rss.pkt_info;
1109
1110                 /* Compute how many status bits were set */
1111                 nb_dd = 0;
1112                 for (j = 0; j < LOOK_AHEAD; ++j)
1113                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1114
1115                 nb_rx += nb_dd;
1116
1117                 /* Translate descriptor info to mbuf format */
1118                 for (j = 0; j < nb_dd; ++j) {
1119                         mb = rxep[j].mbuf;
1120                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1121                                   rxq->crc_len;
1122                         mb->data_len = pkt_len;
1123                         mb->pkt_len = pkt_len;
1124                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1125
1126                         /* convert descriptor fields to rte mbuf flags */
1127                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1128                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1129                         pkt_flags |=
1130                                 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1131                         mb->ol_flags = pkt_flags;
1132                         mb->packet_type =
1133                                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1134
1135                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1136                                 mb->hash.rss = rte_le_to_cpu_32(
1137                                     rxdp[j].wb.lower.hi_dword.rss);
1138                         else if (pkt_flags & PKT_RX_FDIR) {
1139                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1140                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1141                                     IXGBE_ATR_HASH_MASK;
1142                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1143                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1144                         }
1145                 }
1146
1147                 /* Move mbuf pointers from the S/W ring to the stage */
1148                 for (j = 0; j < LOOK_AHEAD; ++j) {
1149                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1150                 }
1151
1152                 /* stop if all requested packets could not be received */
1153                 if (nb_dd != LOOK_AHEAD)
1154                         break;
1155         }
1156
1157         /* clear software ring entries so we can cleanup correctly */
1158         for (i = 0; i < nb_rx; ++i) {
1159                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1160         }
1161
1162
1163         return nb_rx;
1164 }
1165
1166 static inline int
1167 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1168 {
1169         volatile union ixgbe_adv_rx_desc *rxdp;
1170         struct ixgbe_rx_entry *rxep;
1171         struct rte_mbuf *mb;
1172         uint16_t alloc_idx;
1173         __le64 dma_addr;
1174         int diag, i;
1175
1176         /* allocate buffers in bulk directly into the S/W ring */
1177         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1178         rxep = &rxq->sw_ring[alloc_idx];
1179         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1180                                     rxq->rx_free_thresh);
1181         if (unlikely(diag != 0))
1182                 return -ENOMEM;
1183
1184         rxdp = &rxq->rx_ring[alloc_idx];
1185         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1186                 /* populate the static rte mbuf fields */
1187                 mb = rxep[i].mbuf;
1188                 if (reset_mbuf) {
1189                         mb->next = NULL;
1190                         mb->nb_segs = 1;
1191                         mb->port = rxq->port_id;
1192                 }
1193
1194                 rte_mbuf_refcnt_set(mb, 1);
1195                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1196
1197                 /* populate the descriptors */
1198                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1199                 rxdp[i].read.hdr_addr = 0;
1200                 rxdp[i].read.pkt_addr = dma_addr;
1201         }
1202
1203         /* update state of internal queue structure */
1204         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1205         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1206                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1207
1208         /* no errors */
1209         return 0;
1210 }
1211
1212 static inline uint16_t
1213 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1214                          uint16_t nb_pkts)
1215 {
1216         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1217         int i;
1218
1219         /* how many packets are ready to return? */
1220         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1221
1222         /* copy mbuf pointers to the application's packet list */
1223         for (i = 0; i < nb_pkts; ++i)
1224                 rx_pkts[i] = stage[i];
1225
1226         /* update internal queue state */
1227         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1228         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1229
1230         return nb_pkts;
1231 }
1232
1233 static inline uint16_t
1234 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1235              uint16_t nb_pkts)
1236 {
1237         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1238         uint16_t nb_rx = 0;
1239
1240         /* Any previously recv'd pkts will be returned from the Rx stage */
1241         if (rxq->rx_nb_avail)
1242                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1243
1244         /* Scan the H/W ring for packets to receive */
1245         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1246
1247         /* update internal queue state */
1248         rxq->rx_next_avail = 0;
1249         rxq->rx_nb_avail = nb_rx;
1250         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1251
1252         /* if required, allocate new buffers to replenish descriptors */
1253         if (rxq->rx_tail > rxq->rx_free_trigger) {
1254                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1255
1256                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1257                         int i, j;
1258                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1259                                    "queue_id=%u", (unsigned) rxq->port_id,
1260                                    (unsigned) rxq->queue_id);
1261
1262                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1263                                 rxq->rx_free_thresh;
1264
1265                         /*
1266                          * Need to rewind any previous receives if we cannot
1267                          * allocate new buffers to replenish the old ones.
1268                          */
1269                         rxq->rx_nb_avail = 0;
1270                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1271                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1272                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1273
1274                         return 0;
1275                 }
1276
1277                 /* update tail pointer */
1278                 rte_wmb();
1279                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1280         }
1281
1282         if (rxq->rx_tail >= rxq->nb_rx_desc)
1283                 rxq->rx_tail = 0;
1284
1285         /* received any packets this loop? */
1286         if (rxq->rx_nb_avail)
1287                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1288
1289         return 0;
1290 }
1291
1292 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1293 static uint16_t
1294 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1295                            uint16_t nb_pkts)
1296 {
1297         uint16_t nb_rx;
1298
1299         if (unlikely(nb_pkts == 0))
1300                 return 0;
1301
1302         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1303                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1304
1305         /* request is relatively large, chunk it up */
1306         nb_rx = 0;
1307         while (nb_pkts) {
1308                 uint16_t ret, n;
1309                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1310                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1311                 nb_rx = (uint16_t)(nb_rx + ret);
1312                 nb_pkts = (uint16_t)(nb_pkts - ret);
1313                 if (ret < n)
1314                         break;
1315         }
1316
1317         return nb_rx;
1318 }
1319
1320 uint16_t
1321 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1322                 uint16_t nb_pkts)
1323 {
1324         struct ixgbe_rx_queue *rxq;
1325         volatile union ixgbe_adv_rx_desc *rx_ring;
1326         volatile union ixgbe_adv_rx_desc *rxdp;
1327         struct ixgbe_rx_entry *sw_ring;
1328         struct ixgbe_rx_entry *rxe;
1329         struct rte_mbuf *rxm;
1330         struct rte_mbuf *nmb;
1331         union ixgbe_adv_rx_desc rxd;
1332         uint64_t dma_addr;
1333         uint32_t staterr;
1334         uint32_t pkt_info;
1335         uint16_t pkt_len;
1336         uint16_t rx_id;
1337         uint16_t nb_rx;
1338         uint16_t nb_hold;
1339         uint64_t pkt_flags;
1340
1341         nb_rx = 0;
1342         nb_hold = 0;
1343         rxq = rx_queue;
1344         rx_id = rxq->rx_tail;
1345         rx_ring = rxq->rx_ring;
1346         sw_ring = rxq->sw_ring;
1347         while (nb_rx < nb_pkts) {
1348                 /*
1349                  * The order of operations here is important as the DD status
1350                  * bit must not be read after any other descriptor fields.
1351                  * rx_ring and rxdp are pointing to volatile data so the order
1352                  * of accesses cannot be reordered by the compiler. If they were
1353                  * not volatile, they could be reordered which could lead to
1354                  * using invalid descriptor fields when read from rxd.
1355                  */
1356                 rxdp = &rx_ring[rx_id];
1357                 staterr = rxdp->wb.upper.status_error;
1358                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1359                         break;
1360                 rxd = *rxdp;
1361
1362                 /*
1363                  * End of packet.
1364                  *
1365                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1366                  * is likely to be invalid and to be dropped by the various
1367                  * validation checks performed by the network stack.
1368                  *
1369                  * Allocate a new mbuf to replenish the RX ring descriptor.
1370                  * If the allocation fails:
1371                  *    - arrange for that RX descriptor to be the first one
1372                  *      being parsed the next time the receive function is
1373                  *      invoked [on the same queue].
1374                  *
1375                  *    - Stop parsing the RX ring and return immediately.
1376                  *
1377                  * This policy do not drop the packet received in the RX
1378                  * descriptor for which the allocation of a new mbuf failed.
1379                  * Thus, it allows that packet to be later retrieved if
1380                  * mbuf have been freed in the mean time.
1381                  * As a side effect, holding RX descriptors instead of
1382                  * systematically giving them back to the NIC may lead to
1383                  * RX ring exhaustion situations.
1384                  * However, the NIC can gracefully prevent such situations
1385                  * to happen by sending specific "back-pressure" flow control
1386                  * frames to its peer(s).
1387                  */
1388                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1389                            "ext_err_stat=0x%08x pkt_len=%u",
1390                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1391                            (unsigned) rx_id, (unsigned) staterr,
1392                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1393
1394                 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1395                 if (nmb == NULL) {
1396                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1397                                    "queue_id=%u", (unsigned) rxq->port_id,
1398                                    (unsigned) rxq->queue_id);
1399                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1400                         break;
1401                 }
1402
1403                 nb_hold++;
1404                 rxe = &sw_ring[rx_id];
1405                 rx_id++;
1406                 if (rx_id == rxq->nb_rx_desc)
1407                         rx_id = 0;
1408
1409                 /* Prefetch next mbuf while processing current one. */
1410                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1411
1412                 /*
1413                  * When next RX descriptor is on a cache-line boundary,
1414                  * prefetch the next 4 RX descriptors and the next 8 pointers
1415                  * to mbufs.
1416                  */
1417                 if ((rx_id & 0x3) == 0) {
1418                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1419                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1420                 }
1421
1422                 rxm = rxe->mbuf;
1423                 rxe->mbuf = nmb;
1424                 dma_addr =
1425                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1426                 rxdp->read.hdr_addr = 0;
1427                 rxdp->read.pkt_addr = dma_addr;
1428
1429                 /*
1430                  * Initialize the returned mbuf.
1431                  * 1) setup generic mbuf fields:
1432                  *    - number of segments,
1433                  *    - next segment,
1434                  *    - packet length,
1435                  *    - RX port identifier.
1436                  * 2) integrate hardware offload data, if any:
1437                  *    - RSS flag & hash,
1438                  *    - IP checksum flag,
1439                  *    - VLAN TCI, if any,
1440                  *    - error flags.
1441                  */
1442                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1443                                       rxq->crc_len);
1444                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1445                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1446                 rxm->nb_segs = 1;
1447                 rxm->next = NULL;
1448                 rxm->pkt_len = pkt_len;
1449                 rxm->data_len = pkt_len;
1450                 rxm->port = rxq->port_id;
1451
1452                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1453                                                                 pkt_info);
1454                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1455                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1456
1457                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1458                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1459                 pkt_flags = pkt_flags |
1460                         ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1461                 rxm->ol_flags = pkt_flags;
1462                 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1463
1464                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1465                         rxm->hash.rss = rte_le_to_cpu_32(
1466                                                 rxd.wb.lower.hi_dword.rss);
1467                 else if (pkt_flags & PKT_RX_FDIR) {
1468                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1469                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1470                                         IXGBE_ATR_HASH_MASK;
1471                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1472                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1473                 }
1474                 /*
1475                  * Store the mbuf address into the next entry of the array
1476                  * of returned packets.
1477                  */
1478                 rx_pkts[nb_rx++] = rxm;
1479         }
1480         rxq->rx_tail = rx_id;
1481
1482         /*
1483          * If the number of free RX descriptors is greater than the RX free
1484          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1485          * register.
1486          * Update the RDT with the value of the last processed RX descriptor
1487          * minus 1, to guarantee that the RDT register is never equal to the
1488          * RDH register, which creates a "full" ring situtation from the
1489          * hardware point of view...
1490          */
1491         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1492         if (nb_hold > rxq->rx_free_thresh) {
1493                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1494                            "nb_hold=%u nb_rx=%u",
1495                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1496                            (unsigned) rx_id, (unsigned) nb_hold,
1497                            (unsigned) nb_rx);
1498                 rx_id = (uint16_t) ((rx_id == 0) ?
1499                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1500                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1501                 nb_hold = 0;
1502         }
1503         rxq->nb_rx_hold = nb_hold;
1504         return nb_rx;
1505 }
1506
1507 /**
1508  * Detect an RSC descriptor.
1509  */
1510 static inline uint32_t
1511 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1512 {
1513         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1514                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1515 }
1516
1517 /**
1518  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1519  *
1520  * Fill the following info in the HEAD buffer of the Rx cluster:
1521  *    - RX port identifier
1522  *    - hardware offload data, if any:
1523  *      - RSS flag & hash
1524  *      - IP checksum flag
1525  *      - VLAN TCI, if any
1526  *      - error flags
1527  * @head HEAD of the packet cluster
1528  * @desc HW descriptor to get data from
1529  * @port_id Port ID of the Rx queue
1530  */
1531 static inline void
1532 ixgbe_fill_cluster_head_buf(
1533         struct rte_mbuf *head,
1534         union ixgbe_adv_rx_desc *desc,
1535         uint8_t port_id,
1536         uint32_t staterr)
1537 {
1538         uint16_t pkt_info;
1539         uint64_t pkt_flags;
1540
1541         head->port = port_id;
1542
1543         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1544          * set in the pkt_flags field.
1545          */
1546         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1547         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1548         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1549         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1550         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1551         head->ol_flags = pkt_flags;
1552         head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1553
1554         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1555                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1556         else if (pkt_flags & PKT_RX_FDIR) {
1557                 head->hash.fdir.hash =
1558                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1559                                                           & IXGBE_ATR_HASH_MASK;
1560                 head->hash.fdir.id =
1561                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1562         }
1563 }
1564
1565 /**
1566  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1567  *
1568  * @rx_queue Rx queue handle
1569  * @rx_pkts table of received packets
1570  * @nb_pkts size of rx_pkts table
1571  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1572  *
1573  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1574  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1575  *
1576  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1577  * 1) When non-EOP RSC completion arrives:
1578  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1579  *       segment's data length.
1580  *    b) Set the "next" pointer of the current segment to point to the segment
1581  *       at the NEXTP index.
1582  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1583  *       in the sw_rsc_ring.
1584  * 2) When EOP arrives we just update the cluster's total length and offload
1585  *    flags and deliver the cluster up to the upper layers. In our case - put it
1586  *    in the rx_pkts table.
1587  *
1588  * Returns the number of received packets/clusters (according to the "bulk
1589  * receive" interface).
1590  */
1591 static inline uint16_t
1592 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1593                     bool bulk_alloc)
1594 {
1595         struct ixgbe_rx_queue *rxq = rx_queue;
1596         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1597         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1598         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1599         uint16_t rx_id = rxq->rx_tail;
1600         uint16_t nb_rx = 0;
1601         uint16_t nb_hold = rxq->nb_rx_hold;
1602         uint16_t prev_id = rxq->rx_tail;
1603
1604         while (nb_rx < nb_pkts) {
1605                 bool eop;
1606                 struct ixgbe_rx_entry *rxe;
1607                 struct ixgbe_scattered_rx_entry *sc_entry;
1608                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1609                 struct ixgbe_rx_entry *next_rxe;
1610                 struct rte_mbuf *first_seg;
1611                 struct rte_mbuf *rxm;
1612                 struct rte_mbuf *nmb;
1613                 union ixgbe_adv_rx_desc rxd;
1614                 uint16_t data_len;
1615                 uint16_t next_id;
1616                 volatile union ixgbe_adv_rx_desc *rxdp;
1617                 uint32_t staterr;
1618
1619 next_desc:
1620                 /*
1621                  * The code in this whole file uses the volatile pointer to
1622                  * ensure the read ordering of the status and the rest of the
1623                  * descriptor fields (on the compiler level only!!!). This is so
1624                  * UGLY - why not to just use the compiler barrier instead? DPDK
1625                  * even has the rte_compiler_barrier() for that.
1626                  *
1627                  * But most importantly this is just wrong because this doesn't
1628                  * ensure memory ordering in a general case at all. For
1629                  * instance, DPDK is supposed to work on Power CPUs where
1630                  * compiler barrier may just not be enough!
1631                  *
1632                  * I tried to write only this function properly to have a
1633                  * starting point (as a part of an LRO/RSC series) but the
1634                  * compiler cursed at me when I tried to cast away the
1635                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1636                  * keeping it the way it is for now.
1637                  *
1638                  * The code in this file is broken in so many other places and
1639                  * will just not work on a big endian CPU anyway therefore the
1640                  * lines below will have to be revisited together with the rest
1641                  * of the ixgbe PMD.
1642                  *
1643                  * TODO:
1644                  *    - Get rid of "volatile" crap and let the compiler do its
1645                  *      job.
1646                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1647                  *      memory ordering below.
1648                  */
1649                 rxdp = &rx_ring[rx_id];
1650                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1651
1652                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1653                         break;
1654
1655                 rxd = *rxdp;
1656
1657                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1658                                   "staterr=0x%x data_len=%u",
1659                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1660                            rte_le_to_cpu_16(rxd.wb.upper.length));
1661
1662                 if (!bulk_alloc) {
1663                         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1664                         if (nmb == NULL) {
1665                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1666                                                   "port_id=%u queue_id=%u",
1667                                            rxq->port_id, rxq->queue_id);
1668
1669                                 rte_eth_devices[rxq->port_id].data->
1670                                                         rx_mbuf_alloc_failed++;
1671                                 break;
1672                         }
1673                 }
1674                 else if (nb_hold > rxq->rx_free_thresh) {
1675                         uint16_t next_rdt = rxq->rx_free_trigger;
1676
1677                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1678                                 rte_wmb();
1679                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1680                                                     next_rdt);
1681                                 nb_hold -= rxq->rx_free_thresh;
1682                         } else {
1683                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1684                                                   "port_id=%u queue_id=%u",
1685                                            rxq->port_id, rxq->queue_id);
1686
1687                                 rte_eth_devices[rxq->port_id].data->
1688                                                         rx_mbuf_alloc_failed++;
1689                                 break;
1690                         }
1691                 }
1692
1693                 nb_hold++;
1694                 rxe = &sw_ring[rx_id];
1695                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1696
1697                 next_id = rx_id + 1;
1698                 if (next_id == rxq->nb_rx_desc)
1699                         next_id = 0;
1700
1701                 /* Prefetch next mbuf while processing current one. */
1702                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1703
1704                 /*
1705                  * When next RX descriptor is on a cache-line boundary,
1706                  * prefetch the next 4 RX descriptors and the next 4 pointers
1707                  * to mbufs.
1708                  */
1709                 if ((next_id & 0x3) == 0) {
1710                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1711                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1712                 }
1713
1714                 rxm = rxe->mbuf;
1715
1716                 if (!bulk_alloc) {
1717                         __le64 dma =
1718                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1719                         /*
1720                          * Update RX descriptor with the physical address of the
1721                          * new data buffer of the new allocated mbuf.
1722                          */
1723                         rxe->mbuf = nmb;
1724
1725                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1726                         rxdp->read.hdr_addr = 0;
1727                         rxdp->read.pkt_addr = dma;
1728                 } else
1729                         rxe->mbuf = NULL;
1730
1731                 /*
1732                  * Set data length & data buffer address of mbuf.
1733                  */
1734                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1735                 rxm->data_len = data_len;
1736
1737                 if (!eop) {
1738                         uint16_t nextp_id;
1739                         /*
1740                          * Get next descriptor index:
1741                          *  - For RSC it's in the NEXTP field.
1742                          *  - For a scattered packet - it's just a following
1743                          *    descriptor.
1744                          */
1745                         if (ixgbe_rsc_count(&rxd))
1746                                 nextp_id =
1747                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1748                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1749                         else
1750                                 nextp_id = next_id;
1751
1752                         next_sc_entry = &sw_sc_ring[nextp_id];
1753                         next_rxe = &sw_ring[nextp_id];
1754                         rte_ixgbe_prefetch(next_rxe);
1755                 }
1756
1757                 sc_entry = &sw_sc_ring[rx_id];
1758                 first_seg = sc_entry->fbuf;
1759                 sc_entry->fbuf = NULL;
1760
1761                 /*
1762                  * If this is the first buffer of the received packet,
1763                  * set the pointer to the first mbuf of the packet and
1764                  * initialize its context.
1765                  * Otherwise, update the total length and the number of segments
1766                  * of the current scattered packet, and update the pointer to
1767                  * the last mbuf of the current packet.
1768                  */
1769                 if (first_seg == NULL) {
1770                         first_seg = rxm;
1771                         first_seg->pkt_len = data_len;
1772                         first_seg->nb_segs = 1;
1773                 } else {
1774                         first_seg->pkt_len += data_len;
1775                         first_seg->nb_segs++;
1776                 }
1777
1778                 prev_id = rx_id;
1779                 rx_id = next_id;
1780
1781                 /*
1782                  * If this is not the last buffer of the received packet, update
1783                  * the pointer to the first mbuf at the NEXTP entry in the
1784                  * sw_sc_ring and continue to parse the RX ring.
1785                  */
1786                 if (!eop) {
1787                         rxm->next = next_rxe->mbuf;
1788                         next_sc_entry->fbuf = first_seg;
1789                         goto next_desc;
1790                 }
1791
1792                 /*
1793                  * This is the last buffer of the received packet - return
1794                  * the current cluster to the user.
1795                  */
1796                 rxm->next = NULL;
1797
1798                 /* Initialize the first mbuf of the returned packet */
1799                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1800                                             staterr);
1801
1802                 /*
1803                  * Deal with the case, when HW CRC srip is disabled.
1804                  * That can't happen when LRO is enabled, but still could
1805                  * happen for scattered RX mode.
1806                  */
1807                 first_seg->pkt_len -= rxq->crc_len;
1808                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1809                         struct rte_mbuf *lp;
1810
1811                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
1812                                 ;
1813
1814                         first_seg->nb_segs--;
1815                         lp->data_len -= rxq->crc_len - rxm->data_len;
1816                         lp->next = NULL;
1817                         rte_pktmbuf_free_seg(rxm);
1818                 } else
1819                         rxm->data_len -= rxq->crc_len;
1820
1821                 /* Prefetch data of first segment, if configured to do so. */
1822                 rte_packet_prefetch((char *)first_seg->buf_addr +
1823                         first_seg->data_off);
1824
1825                 /*
1826                  * Store the mbuf address into the next entry of the array
1827                  * of returned packets.
1828                  */
1829                 rx_pkts[nb_rx++] = first_seg;
1830         }
1831
1832         /*
1833          * Record index of the next RX descriptor to probe.
1834          */
1835         rxq->rx_tail = rx_id;
1836
1837         /*
1838          * If the number of free RX descriptors is greater than the RX free
1839          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1840          * register.
1841          * Update the RDT with the value of the last processed RX descriptor
1842          * minus 1, to guarantee that the RDT register is never equal to the
1843          * RDH register, which creates a "full" ring situtation from the
1844          * hardware point of view...
1845          */
1846         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1847                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1848                            "nb_hold=%u nb_rx=%u",
1849                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1850
1851                 rte_wmb();
1852                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1853                 nb_hold = 0;
1854         }
1855
1856         rxq->nb_rx_hold = nb_hold;
1857         return nb_rx;
1858 }
1859
1860 uint16_t
1861 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1862                                  uint16_t nb_pkts)
1863 {
1864         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1865 }
1866
1867 uint16_t
1868 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1869                                uint16_t nb_pkts)
1870 {
1871         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1872 }
1873
1874 /*********************************************************************
1875  *
1876  *  Queue management functions
1877  *
1878  **********************************************************************/
1879
1880 static void __attribute__((cold))
1881 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1882 {
1883         unsigned i;
1884
1885         if (txq->sw_ring != NULL) {
1886                 for (i = 0; i < txq->nb_tx_desc; i++) {
1887                         if (txq->sw_ring[i].mbuf != NULL) {
1888                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1889                                 txq->sw_ring[i].mbuf = NULL;
1890                         }
1891                 }
1892         }
1893 }
1894
1895 static void __attribute__((cold))
1896 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1897 {
1898         if (txq != NULL &&
1899             txq->sw_ring != NULL)
1900                 rte_free(txq->sw_ring);
1901 }
1902
1903 static void __attribute__((cold))
1904 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1905 {
1906         if (txq != NULL && txq->ops != NULL) {
1907                 txq->ops->release_mbufs(txq);
1908                 txq->ops->free_swring(txq);
1909                 rte_free(txq);
1910         }
1911 }
1912
1913 void __attribute__((cold))
1914 ixgbe_dev_tx_queue_release(void *txq)
1915 {
1916         ixgbe_tx_queue_release(txq);
1917 }
1918
1919 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1920 static void __attribute__((cold))
1921 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1922 {
1923         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1924         struct ixgbe_tx_entry *txe = txq->sw_ring;
1925         uint16_t prev, i;
1926
1927         /* Zero out HW ring memory */
1928         for (i = 0; i < txq->nb_tx_desc; i++) {
1929                 txq->tx_ring[i] = zeroed_desc;
1930         }
1931
1932         /* Initialize SW ring entries */
1933         prev = (uint16_t) (txq->nb_tx_desc - 1);
1934         for (i = 0; i < txq->nb_tx_desc; i++) {
1935                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1936                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1937                 txe[i].mbuf = NULL;
1938                 txe[i].last_id = i;
1939                 txe[prev].next_id = i;
1940                 prev = i;
1941         }
1942
1943         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1944         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1945
1946         txq->tx_tail = 0;
1947         txq->nb_tx_used = 0;
1948         /*
1949          * Always allow 1 descriptor to be un-allocated to avoid
1950          * a H/W race condition
1951          */
1952         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1953         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1954         txq->ctx_curr = 0;
1955         memset((void*)&txq->ctx_cache, 0,
1956                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1957 }
1958
1959 static const struct ixgbe_txq_ops def_txq_ops = {
1960         .release_mbufs = ixgbe_tx_queue_release_mbufs,
1961         .free_swring = ixgbe_tx_free_swring,
1962         .reset = ixgbe_reset_tx_queue,
1963 };
1964
1965 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1966  * the queue parameters. Used in tx_queue_setup by primary process and then
1967  * in dev_init by secondary process when attaching to an existing ethdev.
1968  */
1969 void __attribute__((cold))
1970 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1971 {
1972         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1973         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1974                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1975                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1976 #ifdef RTE_IXGBE_INC_VECTOR
1977                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1978                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1979                                         ixgbe_txq_vec_setup(txq) == 0)) {
1980                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1981                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1982                 } else
1983 #endif
1984                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1985         } else {
1986                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1987                 PMD_INIT_LOG(DEBUG,
1988                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1989                                 (unsigned long)txq->txq_flags,
1990                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
1991                 PMD_INIT_LOG(DEBUG,
1992                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1993                                 (unsigned long)txq->tx_rs_thresh,
1994                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1995                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1996         }
1997 }
1998
1999 int __attribute__((cold))
2000 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2001                          uint16_t queue_idx,
2002                          uint16_t nb_desc,
2003                          unsigned int socket_id,
2004                          const struct rte_eth_txconf *tx_conf)
2005 {
2006         const struct rte_memzone *tz;
2007         struct ixgbe_tx_queue *txq;
2008         struct ixgbe_hw     *hw;
2009         uint16_t tx_rs_thresh, tx_free_thresh;
2010
2011         PMD_INIT_FUNC_TRACE();
2012         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2013
2014         /*
2015          * Validate number of transmit descriptors.
2016          * It must not exceed hardware maximum, and must be multiple
2017          * of IXGBE_ALIGN.
2018          */
2019         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2020                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2021                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2022                 return -EINVAL;
2023         }
2024
2025         /*
2026          * The following two parameters control the setting of the RS bit on
2027          * transmit descriptors.
2028          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2029          * descriptors have been used.
2030          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2031          * descriptors are used or if the number of descriptors required
2032          * to transmit a packet is greater than the number of free TX
2033          * descriptors.
2034          * The following constraints must be satisfied:
2035          *  tx_rs_thresh must be greater than 0.
2036          *  tx_rs_thresh must be less than the size of the ring minus 2.
2037          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2038          *  tx_rs_thresh must be a divisor of the ring size.
2039          *  tx_free_thresh must be greater than 0.
2040          *  tx_free_thresh must be less than the size of the ring minus 3.
2041          * One descriptor in the TX ring is used as a sentinel to avoid a
2042          * H/W race condition, hence the maximum threshold constraints.
2043          * When set to zero use default values.
2044          */
2045         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2046                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2047         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2048                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2049         if (tx_rs_thresh >= (nb_desc - 2)) {
2050                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2051                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2052                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2053                         (int)dev->data->port_id, (int)queue_idx);
2054                 return -(EINVAL);
2055         }
2056         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2057                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2058                         "(tx_rs_thresh=%u port=%d queue=%d)",
2059                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2060                         (int)dev->data->port_id, (int)queue_idx);
2061                 return -(EINVAL);
2062         }
2063         if (tx_free_thresh >= (nb_desc - 3)) {
2064                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2065                              "tx_free_thresh must be less than the number of "
2066                              "TX descriptors minus 3. (tx_free_thresh=%u "
2067                              "port=%d queue=%d)",
2068                              (unsigned int)tx_free_thresh,
2069                              (int)dev->data->port_id, (int)queue_idx);
2070                 return -(EINVAL);
2071         }
2072         if (tx_rs_thresh > tx_free_thresh) {
2073                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2074                              "tx_free_thresh. (tx_free_thresh=%u "
2075                              "tx_rs_thresh=%u port=%d queue=%d)",
2076                              (unsigned int)tx_free_thresh,
2077                              (unsigned int)tx_rs_thresh,
2078                              (int)dev->data->port_id,
2079                              (int)queue_idx);
2080                 return -(EINVAL);
2081         }
2082         if ((nb_desc % tx_rs_thresh) != 0) {
2083                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2084                              "number of TX descriptors. (tx_rs_thresh=%u "
2085                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2086                              (int)dev->data->port_id, (int)queue_idx);
2087                 return -(EINVAL);
2088         }
2089
2090         /*
2091          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2092          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2093          * by the NIC and all descriptors are written back after the NIC
2094          * accumulates WTHRESH descriptors.
2095          */
2096         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2097                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2098                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2099                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2100                              (int)dev->data->port_id, (int)queue_idx);
2101                 return -(EINVAL);
2102         }
2103
2104         /* Free memory prior to re-allocation if needed... */
2105         if (dev->data->tx_queues[queue_idx] != NULL) {
2106                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2107                 dev->data->tx_queues[queue_idx] = NULL;
2108         }
2109
2110         /* First allocate the tx queue data structure */
2111         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2112                                  RTE_CACHE_LINE_SIZE, socket_id);
2113         if (txq == NULL)
2114                 return -ENOMEM;
2115
2116         /*
2117          * Allocate TX ring hardware descriptors. A memzone large enough to
2118          * handle the maximum ring size is allocated in order to allow for
2119          * resizing in later calls to the queue setup function.
2120          */
2121         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2122                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2123                         IXGBE_ALIGN, socket_id);
2124         if (tz == NULL) {
2125                 ixgbe_tx_queue_release(txq);
2126                 return -ENOMEM;
2127         }
2128
2129         txq->nb_tx_desc = nb_desc;
2130         txq->tx_rs_thresh = tx_rs_thresh;
2131         txq->tx_free_thresh = tx_free_thresh;
2132         txq->pthresh = tx_conf->tx_thresh.pthresh;
2133         txq->hthresh = tx_conf->tx_thresh.hthresh;
2134         txq->wthresh = tx_conf->tx_thresh.wthresh;
2135         txq->queue_id = queue_idx;
2136         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2137                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2138         txq->port_id = dev->data->port_id;
2139         txq->txq_flags = tx_conf->txq_flags;
2140         txq->ops = &def_txq_ops;
2141         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2142
2143         /*
2144          * Modification to set VFTDT for virtual function if vf is detected
2145          */
2146         if (hw->mac.type == ixgbe_mac_82599_vf ||
2147             hw->mac.type == ixgbe_mac_X540_vf ||
2148             hw->mac.type == ixgbe_mac_X550_vf ||
2149             hw->mac.type == ixgbe_mac_X550EM_x_vf)
2150                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2151         else
2152                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2153
2154         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2155         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2156
2157         /* Allocate software ring */
2158         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2159                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2160                                 RTE_CACHE_LINE_SIZE, socket_id);
2161         if (txq->sw_ring == NULL) {
2162                 ixgbe_tx_queue_release(txq);
2163                 return -ENOMEM;
2164         }
2165         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2166                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2167
2168         /* set up vector or scalar TX function as appropriate */
2169         ixgbe_set_tx_function(dev, txq);
2170
2171         txq->ops->reset(txq);
2172
2173         dev->data->tx_queues[queue_idx] = txq;
2174
2175
2176         return 0;
2177 }
2178
2179 /**
2180  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2181  *
2182  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2183  * in the sw_rsc_ring is not set to NULL but rather points to the next
2184  * mbuf of this RSC aggregation (that has not been completed yet and still
2185  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2186  * will just free first "nb_segs" segments of the cluster explicitly by calling
2187  * an rte_pktmbuf_free_seg().
2188  *
2189  * @m scattered cluster head
2190  */
2191 static void __attribute__((cold))
2192 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2193 {
2194         uint8_t i, nb_segs = m->nb_segs;
2195         struct rte_mbuf *next_seg;
2196
2197         for (i = 0; i < nb_segs; i++) {
2198                 next_seg = m->next;
2199                 rte_pktmbuf_free_seg(m);
2200                 m = next_seg;
2201         }
2202 }
2203
2204 static void __attribute__((cold))
2205 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2206 {
2207         unsigned i;
2208
2209 #ifdef RTE_IXGBE_INC_VECTOR
2210         /* SSE Vector driver has a different way of releasing mbufs. */
2211         if (rxq->rx_using_sse) {
2212                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2213                 return;
2214         }
2215 #endif
2216
2217         if (rxq->sw_ring != NULL) {
2218                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2219                         if (rxq->sw_ring[i].mbuf != NULL) {
2220                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2221                                 rxq->sw_ring[i].mbuf = NULL;
2222                         }
2223                 }
2224                 if (rxq->rx_nb_avail) {
2225                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2226                                 struct rte_mbuf *mb;
2227                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2228                                 rte_pktmbuf_free_seg(mb);
2229                         }
2230                         rxq->rx_nb_avail = 0;
2231                 }
2232         }
2233
2234         if (rxq->sw_sc_ring)
2235                 for (i = 0; i < rxq->nb_rx_desc; i++)
2236                         if (rxq->sw_sc_ring[i].fbuf) {
2237                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2238                                 rxq->sw_sc_ring[i].fbuf = NULL;
2239                         }
2240 }
2241
2242 static void __attribute__((cold))
2243 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2244 {
2245         if (rxq != NULL) {
2246                 ixgbe_rx_queue_release_mbufs(rxq);
2247                 rte_free(rxq->sw_ring);
2248                 rte_free(rxq->sw_sc_ring);
2249                 rte_free(rxq);
2250         }
2251 }
2252
2253 void __attribute__((cold))
2254 ixgbe_dev_rx_queue_release(void *rxq)
2255 {
2256         ixgbe_rx_queue_release(rxq);
2257 }
2258
2259 /*
2260  * Check if Rx Burst Bulk Alloc function can be used.
2261  * Return
2262  *        0: the preconditions are satisfied and the bulk allocation function
2263  *           can be used.
2264  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2265  *           function must be used.
2266  */
2267 static inline int __attribute__((cold))
2268 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2269 {
2270         int ret = 0;
2271
2272         /*
2273          * Make sure the following pre-conditions are satisfied:
2274          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2275          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2276          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2277          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2278          * Scattered packets are not supported.  This should be checked
2279          * outside of this function.
2280          */
2281         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2282                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2283                              "rxq->rx_free_thresh=%d, "
2284                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2285                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2286                 ret = -EINVAL;
2287         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2288                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2289                              "rxq->rx_free_thresh=%d, "
2290                              "rxq->nb_rx_desc=%d",
2291                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2292                 ret = -EINVAL;
2293         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2294                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2295                              "rxq->nb_rx_desc=%d, "
2296                              "rxq->rx_free_thresh=%d",
2297                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2298                 ret = -EINVAL;
2299         } else if (!(rxq->nb_rx_desc <
2300                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2301                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2302                              "rxq->nb_rx_desc=%d, "
2303                              "IXGBE_MAX_RING_DESC=%d, "
2304                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2305                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2306                              RTE_PMD_IXGBE_RX_MAX_BURST);
2307                 ret = -EINVAL;
2308         }
2309
2310         return ret;
2311 }
2312
2313 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2314 static void __attribute__((cold))
2315 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2316 {
2317         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2318         unsigned i;
2319         uint16_t len = rxq->nb_rx_desc;
2320
2321         /*
2322          * By default, the Rx queue setup function allocates enough memory for
2323          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2324          * extra memory at the end of the descriptor ring to be zero'd out. A
2325          * pre-condition for using the Rx burst bulk alloc function is that the
2326          * number of descriptors is less than or equal to
2327          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2328          * constraints here to see if we need to zero out memory after the end
2329          * of the H/W descriptor ring.
2330          */
2331         if (adapter->rx_bulk_alloc_allowed)
2332                 /* zero out extra memory */
2333                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2334
2335         /*
2336          * Zero out HW ring memory. Zero out extra memory at the end of
2337          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2338          * reads extra memory as zeros.
2339          */
2340         for (i = 0; i < len; i++) {
2341                 rxq->rx_ring[i] = zeroed_desc;
2342         }
2343
2344         /*
2345          * initialize extra software ring entries. Space for these extra
2346          * entries is always allocated
2347          */
2348         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2349         for (i = rxq->nb_rx_desc; i < len; ++i) {
2350                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2351         }
2352
2353         rxq->rx_nb_avail = 0;
2354         rxq->rx_next_avail = 0;
2355         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2356         rxq->rx_tail = 0;
2357         rxq->nb_rx_hold = 0;
2358         rxq->pkt_first_seg = NULL;
2359         rxq->pkt_last_seg = NULL;
2360
2361 #ifdef RTE_IXGBE_INC_VECTOR
2362         rxq->rxrearm_start = 0;
2363         rxq->rxrearm_nb = 0;
2364 #endif
2365 }
2366
2367 int __attribute__((cold))
2368 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2369                          uint16_t queue_idx,
2370                          uint16_t nb_desc,
2371                          unsigned int socket_id,
2372                          const struct rte_eth_rxconf *rx_conf,
2373                          struct rte_mempool *mp)
2374 {
2375         const struct rte_memzone *rz;
2376         struct ixgbe_rx_queue *rxq;
2377         struct ixgbe_hw     *hw;
2378         uint16_t len;
2379         struct ixgbe_adapter *adapter =
2380                 (struct ixgbe_adapter *)dev->data->dev_private;
2381
2382         PMD_INIT_FUNC_TRACE();
2383         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2384
2385         /*
2386          * Validate number of receive descriptors.
2387          * It must not exceed hardware maximum, and must be multiple
2388          * of IXGBE_ALIGN.
2389          */
2390         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2391                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2392                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2393                 return -EINVAL;
2394         }
2395
2396         /* Free memory prior to re-allocation if needed... */
2397         if (dev->data->rx_queues[queue_idx] != NULL) {
2398                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2399                 dev->data->rx_queues[queue_idx] = NULL;
2400         }
2401
2402         /* First allocate the rx queue data structure */
2403         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2404                                  RTE_CACHE_LINE_SIZE, socket_id);
2405         if (rxq == NULL)
2406                 return -ENOMEM;
2407         rxq->mb_pool = mp;
2408         rxq->nb_rx_desc = nb_desc;
2409         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2410         rxq->queue_id = queue_idx;
2411         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2412                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2413         rxq->port_id = dev->data->port_id;
2414         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2415                                                         0 : ETHER_CRC_LEN);
2416         rxq->drop_en = rx_conf->rx_drop_en;
2417         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2418
2419         /*
2420          * Allocate RX ring hardware descriptors. A memzone large enough to
2421          * handle the maximum ring size is allocated in order to allow for
2422          * resizing in later calls to the queue setup function.
2423          */
2424         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2425                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2426         if (rz == NULL) {
2427                 ixgbe_rx_queue_release(rxq);
2428                 return -ENOMEM;
2429         }
2430
2431         /*
2432          * Zero init all the descriptors in the ring.
2433          */
2434         memset (rz->addr, 0, RX_RING_SZ);
2435
2436         /*
2437          * Modified to setup VFRDT for Virtual Function
2438          */
2439         if (hw->mac.type == ixgbe_mac_82599_vf ||
2440             hw->mac.type == ixgbe_mac_X540_vf ||
2441             hw->mac.type == ixgbe_mac_X550_vf ||
2442             hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2443                 rxq->rdt_reg_addr =
2444                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2445                 rxq->rdh_reg_addr =
2446                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2447         }
2448         else {
2449                 rxq->rdt_reg_addr =
2450                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2451                 rxq->rdh_reg_addr =
2452                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2453         }
2454
2455         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2456         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2457
2458         /*
2459          * Certain constraints must be met in order to use the bulk buffer
2460          * allocation Rx burst function. If any of Rx queues doesn't meet them
2461          * the feature should be disabled for the whole port.
2462          */
2463         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2464                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2465                                     "preconditions - canceling the feature for "
2466                                     "the whole port[%d]",
2467                              rxq->queue_id, rxq->port_id);
2468                 adapter->rx_bulk_alloc_allowed = false;
2469         }
2470
2471         /*
2472          * Allocate software ring. Allow for space at the end of the
2473          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2474          * function does not access an invalid memory region.
2475          */
2476         len = nb_desc;
2477         if (adapter->rx_bulk_alloc_allowed)
2478                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2479
2480         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2481                                           sizeof(struct ixgbe_rx_entry) * len,
2482                                           RTE_CACHE_LINE_SIZE, socket_id);
2483         if (!rxq->sw_ring) {
2484                 ixgbe_rx_queue_release(rxq);
2485                 return -ENOMEM;
2486         }
2487
2488         /*
2489          * Always allocate even if it's not going to be needed in order to
2490          * simplify the code.
2491          *
2492          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2493          * be requested in ixgbe_dev_rx_init(), which is called later from
2494          * dev_start() flow.
2495          */
2496         rxq->sw_sc_ring =
2497                 rte_zmalloc_socket("rxq->sw_sc_ring",
2498                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2499                                    RTE_CACHE_LINE_SIZE, socket_id);
2500         if (!rxq->sw_sc_ring) {
2501                 ixgbe_rx_queue_release(rxq);
2502                 return -ENOMEM;
2503         }
2504
2505         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2506                             "dma_addr=0x%"PRIx64,
2507                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2508                      rxq->rx_ring_phys_addr);
2509
2510         if (!rte_is_power_of_2(nb_desc)) {
2511                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2512                                     "preconditions - canceling the feature for "
2513                                     "the whole port[%d]",
2514                              rxq->queue_id, rxq->port_id);
2515                 adapter->rx_vec_allowed = false;
2516         } else
2517                 ixgbe_rxq_vec_setup(rxq);
2518
2519         dev->data->rx_queues[queue_idx] = rxq;
2520
2521         ixgbe_reset_rx_queue(adapter, rxq);
2522
2523         return 0;
2524 }
2525
2526 uint32_t
2527 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2528 {
2529 #define IXGBE_RXQ_SCAN_INTERVAL 4
2530         volatile union ixgbe_adv_rx_desc *rxdp;
2531         struct ixgbe_rx_queue *rxq;
2532         uint32_t desc = 0;
2533
2534         if (rx_queue_id >= dev->data->nb_rx_queues) {
2535                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2536                 return 0;
2537         }
2538
2539         rxq = dev->data->rx_queues[rx_queue_id];
2540         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2541
2542         while ((desc < rxq->nb_rx_desc) &&
2543                 (rxdp->wb.upper.status_error &
2544                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2545                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2546                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2547                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2548                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2549                                 desc - rxq->nb_rx_desc]);
2550         }
2551
2552         return desc;
2553 }
2554
2555 int
2556 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2557 {
2558         volatile union ixgbe_adv_rx_desc *rxdp;
2559         struct ixgbe_rx_queue *rxq = rx_queue;
2560         uint32_t desc;
2561
2562         if (unlikely(offset >= rxq->nb_rx_desc))
2563                 return 0;
2564         desc = rxq->rx_tail + offset;
2565         if (desc >= rxq->nb_rx_desc)
2566                 desc -= rxq->nb_rx_desc;
2567
2568         rxdp = &rxq->rx_ring[desc];
2569         return !!(rxdp->wb.upper.status_error &
2570                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2571 }
2572
2573 void __attribute__((cold))
2574 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2575 {
2576         unsigned i;
2577         struct ixgbe_adapter *adapter =
2578                 (struct ixgbe_adapter *)dev->data->dev_private;
2579
2580         PMD_INIT_FUNC_TRACE();
2581
2582         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2583                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2584                 if (txq != NULL) {
2585                         txq->ops->release_mbufs(txq);
2586                         txq->ops->reset(txq);
2587                 }
2588         }
2589
2590         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2591                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2592                 if (rxq != NULL) {
2593                         ixgbe_rx_queue_release_mbufs(rxq);
2594                         ixgbe_reset_rx_queue(adapter, rxq);
2595                 }
2596         }
2597 }
2598
2599 void
2600 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2601 {
2602         unsigned i;
2603
2604         PMD_INIT_FUNC_TRACE();
2605
2606         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2607                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2608                 dev->data->rx_queues[i] = NULL;
2609         }
2610         dev->data->nb_rx_queues = 0;
2611
2612         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2613                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2614                 dev->data->tx_queues[i] = NULL;
2615         }
2616         dev->data->nb_tx_queues = 0;
2617 }
2618
2619 /*********************************************************************
2620  *
2621  *  Device RX/TX init functions
2622  *
2623  **********************************************************************/
2624
2625 /**
2626  * Receive Side Scaling (RSS)
2627  * See section 7.1.2.8 in the following document:
2628  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2629  *
2630  * Principles:
2631  * The source and destination IP addresses of the IP header and the source
2632  * and destination ports of TCP/UDP headers, if any, of received packets are
2633  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2634  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2635  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2636  * RSS output index which is used as the RX queue index where to store the
2637  * received packets.
2638  * The following output is supplied in the RX write-back descriptor:
2639  *     - 32-bit result of the Microsoft RSS hash function,
2640  *     - 4-bit RSS type field.
2641  */
2642
2643 /*
2644  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2645  * Used as the default key.
2646  */
2647 static uint8_t rss_intel_key[40] = {
2648         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2649         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2650         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2651         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2652         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2653 };
2654
2655 static void
2656 ixgbe_rss_disable(struct rte_eth_dev *dev)
2657 {
2658         struct ixgbe_hw *hw;
2659         uint32_t mrqc;
2660         uint32_t mrqc_reg;
2661
2662         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2663         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2664         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2665         mrqc &= ~IXGBE_MRQC_RSSEN;
2666         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2667 }
2668
2669 static void
2670 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2671 {
2672         uint8_t  *hash_key;
2673         uint32_t mrqc;
2674         uint32_t rss_key;
2675         uint64_t rss_hf;
2676         uint16_t i;
2677         uint32_t mrqc_reg;
2678         uint32_t rssrk_reg;
2679
2680         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2681         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2682
2683         hash_key = rss_conf->rss_key;
2684         if (hash_key != NULL) {
2685                 /* Fill in RSS hash key */
2686                 for (i = 0; i < 10; i++) {
2687                         rss_key  = hash_key[(i * 4)];
2688                         rss_key |= hash_key[(i * 4) + 1] << 8;
2689                         rss_key |= hash_key[(i * 4) + 2] << 16;
2690                         rss_key |= hash_key[(i * 4) + 3] << 24;
2691                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2692                 }
2693         }
2694
2695         /* Set configured hashing protocols in MRQC register */
2696         rss_hf = rss_conf->rss_hf;
2697         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2698         if (rss_hf & ETH_RSS_IPV4)
2699                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2700         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2701                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2702         if (rss_hf & ETH_RSS_IPV6)
2703                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2704         if (rss_hf & ETH_RSS_IPV6_EX)
2705                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2706         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2707                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2708         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2709                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2710         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2711                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2712         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2713                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2714         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2715                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2716         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2717 }
2718
2719 int
2720 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2721                           struct rte_eth_rss_conf *rss_conf)
2722 {
2723         struct ixgbe_hw *hw;
2724         uint32_t mrqc;
2725         uint64_t rss_hf;
2726         uint32_t mrqc_reg;
2727
2728         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2729
2730         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2731                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2732                         "NIC.");
2733                 return -ENOTSUP;
2734         }
2735         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2736
2737         /*
2738          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2739          *     "RSS enabling cannot be done dynamically while it must be
2740          *      preceded by a software reset"
2741          * Before changing anything, first check that the update RSS operation
2742          * does not attempt to disable RSS, if RSS was enabled at
2743          * initialization time, or does not attempt to enable RSS, if RSS was
2744          * disabled at initialization time.
2745          */
2746         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2747         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2748         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2749                 if (rss_hf != 0) /* Enable RSS */
2750                         return -(EINVAL);
2751                 return 0; /* Nothing to do */
2752         }
2753         /* RSS enabled */
2754         if (rss_hf == 0) /* Disable RSS */
2755                 return -(EINVAL);
2756         ixgbe_hw_rss_hash_set(hw, rss_conf);
2757         return 0;
2758 }
2759
2760 int
2761 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2762                             struct rte_eth_rss_conf *rss_conf)
2763 {
2764         struct ixgbe_hw *hw;
2765         uint8_t *hash_key;
2766         uint32_t mrqc;
2767         uint32_t rss_key;
2768         uint64_t rss_hf;
2769         uint16_t i;
2770         uint32_t mrqc_reg;
2771         uint32_t rssrk_reg;
2772
2773         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2774         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2775         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2776         hash_key = rss_conf->rss_key;
2777         if (hash_key != NULL) {
2778                 /* Return RSS hash key */
2779                 for (i = 0; i < 10; i++) {
2780                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2781                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2782                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2783                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2784                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2785                 }
2786         }
2787
2788         /* Get RSS functions configured in MRQC register */
2789         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2790         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2791                 rss_conf->rss_hf = 0;
2792                 return 0;
2793         }
2794         rss_hf = 0;
2795         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2796                 rss_hf |= ETH_RSS_IPV4;
2797         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2798                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2799         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2800                 rss_hf |= ETH_RSS_IPV6;
2801         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2802                 rss_hf |= ETH_RSS_IPV6_EX;
2803         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2804                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2805         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2806                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2807         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2808                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2809         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2810                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2811         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2812                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2813         rss_conf->rss_hf = rss_hf;
2814         return 0;
2815 }
2816
2817 static void
2818 ixgbe_rss_configure(struct rte_eth_dev *dev)
2819 {
2820         struct rte_eth_rss_conf rss_conf;
2821         struct ixgbe_hw *hw;
2822         uint32_t reta;
2823         uint16_t i;
2824         uint16_t j;
2825         uint16_t sp_reta_size;
2826         uint32_t reta_reg;
2827
2828         PMD_INIT_FUNC_TRACE();
2829         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2830
2831         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2832
2833         /*
2834          * Fill in redirection table
2835          * The byte-swap is needed because NIC registers are in
2836          * little-endian order.
2837          */
2838         reta = 0;
2839         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2840                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2841
2842                 if (j == dev->data->nb_rx_queues)
2843                         j = 0;
2844                 reta = (reta << 8) | j;
2845                 if ((i & 3) == 3)
2846                         IXGBE_WRITE_REG(hw, reta_reg,
2847                                         rte_bswap32(reta));
2848         }
2849
2850         /*
2851          * Configure the RSS key and the RSS protocols used to compute
2852          * the RSS hash of input packets.
2853          */
2854         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2855         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2856                 ixgbe_rss_disable(dev);
2857                 return;
2858         }
2859         if (rss_conf.rss_key == NULL)
2860                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2861         ixgbe_hw_rss_hash_set(hw, &rss_conf);
2862 }
2863
2864 #define NUM_VFTA_REGISTERS 128
2865 #define NIC_RX_BUFFER_SIZE 0x200
2866 #define X550_RX_BUFFER_SIZE 0x180
2867
2868 static void
2869 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2870 {
2871         struct rte_eth_vmdq_dcb_conf *cfg;
2872         struct ixgbe_hw *hw;
2873         enum rte_eth_nb_pools num_pools;
2874         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2875         uint16_t pbsize;
2876         uint8_t nb_tcs; /* number of traffic classes */
2877         int i;
2878
2879         PMD_INIT_FUNC_TRACE();
2880         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2881         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2882         num_pools = cfg->nb_queue_pools;
2883         /* Check we have a valid number of pools */
2884         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2885                 ixgbe_rss_disable(dev);
2886                 return;
2887         }
2888         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2889         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2890
2891         /*
2892          * RXPBSIZE
2893          * split rx buffer up into sections, each for 1 traffic class
2894          */
2895         switch (hw->mac.type) {
2896         case ixgbe_mac_X550:
2897         case ixgbe_mac_X550EM_x:
2898                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2899                 break;
2900         default:
2901                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2902                 break;
2903         }
2904         for (i = 0 ; i < nb_tcs; i++) {
2905                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2906                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2907                 /* clear 10 bits. */
2908                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2909                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2910         }
2911         /* zero alloc all unused TCs */
2912         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2913                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2914                 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2915                 /* clear 10 bits. */
2916                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2917         }
2918
2919         /* MRQC: enable vmdq and dcb */
2920         mrqc = ((num_pools == ETH_16_POOLS) ? \
2921                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2922         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2923
2924         /* PFVTCTL: turn on virtualisation and set the default pool */
2925         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2926         if (cfg->enable_default_pool) {
2927                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2928         } else {
2929                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2930         }
2931
2932         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2933
2934         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2935         queue_mapping = 0;
2936         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2937                 /*
2938                  * mapping is done with 3 bits per priority,
2939                  * so shift by i*3 each time
2940                  */
2941                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2942
2943         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2944
2945         /* RTRPCS: DCB related */
2946         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2947
2948         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2949         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2950         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2951         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2952
2953         /* VFTA - enable all vlan filters */
2954         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2955                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2956         }
2957
2958         /* VFRE: pool enabling for receive - 16 or 32 */
2959         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2960                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2961
2962         /*
2963          * MPSAR - allow pools to read specific mac addresses
2964          * In this case, all pools should be able to read from mac addr 0
2965          */
2966         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2967         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2968
2969         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2970         for (i = 0; i < cfg->nb_pool_maps; i++) {
2971                 /* set vlan id in VF register and set the valid bit */
2972                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2973                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
2974                 /*
2975                  * Put the allowed pools in VFB reg. As we only have 16 or 32
2976                  * pools, we only need to use the first half of the register
2977                  * i.e. bits 0-31
2978                  */
2979                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2980         }
2981 }
2982
2983 /**
2984  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2985  * @hw: pointer to hardware structure
2986  * @dcb_config: pointer to ixgbe_dcb_config structure
2987  */
2988 static void
2989 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2990                struct ixgbe_dcb_config *dcb_config)
2991 {
2992         uint32_t reg;
2993         uint32_t q;
2994
2995         PMD_INIT_FUNC_TRACE();
2996         if (hw->mac.type != ixgbe_mac_82598EB) {
2997                 /* Disable the Tx desc arbiter so that MTQC can be changed */
2998                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2999                 reg |= IXGBE_RTTDCS_ARBDIS;
3000                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3001
3002                 /* Enable DCB for Tx with 8 TCs */
3003                 if (dcb_config->num_tcs.pg_tcs == 8) {
3004                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3005                 }
3006                 else {
3007                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3008                 }
3009                 if (dcb_config->vt_mode)
3010                     reg |= IXGBE_MTQC_VT_ENA;
3011                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3012
3013                 /* Disable drop for all queues */
3014                 for (q = 0; q < 128; q++)
3015                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3016                      (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3017
3018                 /* Enable the Tx desc arbiter */
3019                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3020                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3021                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3022
3023                 /* Enable Security TX Buffer IFG for DCB */
3024                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3025                 reg |= IXGBE_SECTX_DCB;
3026                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3027         }
3028         return;
3029 }
3030
3031 /**
3032  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3033  * @dev: pointer to rte_eth_dev structure
3034  * @dcb_config: pointer to ixgbe_dcb_config structure
3035  */
3036 static void
3037 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3038                         struct ixgbe_dcb_config *dcb_config)
3039 {
3040         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3041                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3042         struct ixgbe_hw *hw =
3043                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3044
3045         PMD_INIT_FUNC_TRACE();
3046         if (hw->mac.type != ixgbe_mac_82598EB)
3047                 /*PF VF Transmit Enable*/
3048                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3049                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3050
3051         /*Configure general DCB TX parameters*/
3052         ixgbe_dcb_tx_hw_config(hw,dcb_config);
3053         return;
3054 }
3055
3056 static void
3057 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3058                         struct ixgbe_dcb_config *dcb_config)
3059 {
3060         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3061                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3062         struct ixgbe_dcb_tc_config *tc;
3063         uint8_t i,j;
3064
3065         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3066         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3067                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3068                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3069         }
3070         else {
3071                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3072                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3073         }
3074         /* User Priority to Traffic Class mapping */
3075         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3076                 j = vmdq_rx_conf->dcb_tc[i];
3077                 tc = &dcb_config->tc_config[j];
3078                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3079                                                 (uint8_t)(1 << j);
3080         }
3081 }
3082
3083 static void
3084 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3085                         struct ixgbe_dcb_config *dcb_config)
3086 {
3087         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3088                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3089         struct ixgbe_dcb_tc_config *tc;
3090         uint8_t i,j;
3091
3092         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3093         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3094                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3095                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3096         }
3097         else {
3098                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3099                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3100         }
3101
3102         /* User Priority to Traffic Class mapping */
3103         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3104                 j = vmdq_tx_conf->dcb_tc[i];
3105                 tc = &dcb_config->tc_config[j];
3106                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3107                                                 (uint8_t)(1 << j);
3108         }
3109         return;
3110 }
3111
3112 static void
3113 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3114                 struct ixgbe_dcb_config *dcb_config)
3115 {
3116         struct rte_eth_dcb_rx_conf *rx_conf =
3117                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3118         struct ixgbe_dcb_tc_config *tc;
3119         uint8_t i,j;
3120
3121         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3122         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3123
3124         /* User Priority to Traffic Class mapping */
3125         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3126                 j = rx_conf->dcb_tc[i];
3127                 tc = &dcb_config->tc_config[j];
3128                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3129                                                 (uint8_t)(1 << j);
3130         }
3131 }
3132
3133 static void
3134 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3135                 struct ixgbe_dcb_config *dcb_config)
3136 {
3137         struct rte_eth_dcb_tx_conf *tx_conf =
3138                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3139         struct ixgbe_dcb_tc_config *tc;
3140         uint8_t i,j;
3141
3142         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3143         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3144
3145         /* User Priority to Traffic Class mapping */
3146         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3147                 j = tx_conf->dcb_tc[i];
3148                 tc = &dcb_config->tc_config[j];
3149                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3150                                                 (uint8_t)(1 << j);
3151         }
3152 }
3153
3154 /**
3155  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3156  * @hw: pointer to hardware structure
3157  * @dcb_config: pointer to ixgbe_dcb_config structure
3158  */
3159 static void
3160 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3161                struct ixgbe_dcb_config *dcb_config)
3162 {
3163         uint32_t reg;
3164         uint32_t vlanctrl;
3165         uint8_t i;
3166
3167         PMD_INIT_FUNC_TRACE();
3168         /*
3169          * Disable the arbiter before changing parameters
3170          * (always enable recycle mode; WSP)
3171          */
3172         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3173         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3174
3175         if (hw->mac.type != ixgbe_mac_82598EB) {
3176                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3177                 if (dcb_config->num_tcs.pg_tcs == 4) {
3178                         if (dcb_config->vt_mode)
3179                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3180                                         IXGBE_MRQC_VMDQRT4TCEN;
3181                         else {
3182                                 /* no matter the mode is DCB or DCB_RSS, just
3183                                  * set the MRQE to RSSXTCEN. RSS is controlled
3184                                  * by RSS_FIELD
3185                                  */
3186                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3187                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3188                                         IXGBE_MRQC_RTRSS4TCEN;
3189                         }
3190                 }
3191                 if (dcb_config->num_tcs.pg_tcs == 8) {
3192                         if (dcb_config->vt_mode)
3193                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3194                                         IXGBE_MRQC_VMDQRT8TCEN;
3195                         else {
3196                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3197                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3198                                         IXGBE_MRQC_RTRSS8TCEN;
3199                         }
3200                 }
3201
3202                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3203         }
3204
3205         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3206         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3207         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3208         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3209
3210         /* VFTA - enable all vlan filters */
3211         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3212                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3213         }
3214
3215         /*
3216          * Configure Rx packet plane (recycle mode; WSP) and
3217          * enable arbiter
3218          */
3219         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3220         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3221
3222         return;
3223 }
3224
3225 static void
3226 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3227                         uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3228 {
3229         switch (hw->mac.type) {
3230         case ixgbe_mac_82598EB:
3231                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3232                 break;
3233         case ixgbe_mac_82599EB:
3234         case ixgbe_mac_X540:
3235         case ixgbe_mac_X550:
3236         case ixgbe_mac_X550EM_x:
3237                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3238                                                   tsa, map);
3239                 break;
3240         default:
3241                 break;
3242         }
3243 }
3244
3245 static void
3246 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3247                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3248 {
3249         switch (hw->mac.type) {
3250         case ixgbe_mac_82598EB:
3251                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3252                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3253                 break;
3254         case ixgbe_mac_82599EB:
3255         case ixgbe_mac_X540:
3256         case ixgbe_mac_X550:
3257         case ixgbe_mac_X550EM_x:
3258                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3259                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3260                 break;
3261         default:
3262                 break;
3263         }
3264 }
3265
3266 #define DCB_RX_CONFIG  1
3267 #define DCB_TX_CONFIG  1
3268 #define DCB_TX_PB      1024
3269 /**
3270  * ixgbe_dcb_hw_configure - Enable DCB and configure
3271  * general DCB in VT mode and non-VT mode parameters
3272  * @dev: pointer to rte_eth_dev structure
3273  * @dcb_config: pointer to ixgbe_dcb_config structure
3274  */
3275 static int
3276 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3277                         struct ixgbe_dcb_config *dcb_config)
3278 {
3279         int     ret = 0;
3280         uint8_t i,pfc_en,nb_tcs;
3281         uint16_t pbsize, rx_buffer_size;
3282         uint8_t config_dcb_rx = 0;
3283         uint8_t config_dcb_tx = 0;
3284         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3285         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3286         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3287         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3288         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3289         struct ixgbe_dcb_tc_config *tc;
3290         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3291         struct ixgbe_hw *hw =
3292                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3293
3294         switch(dev->data->dev_conf.rxmode.mq_mode){
3295         case ETH_MQ_RX_VMDQ_DCB:
3296                 dcb_config->vt_mode = true;
3297                 if (hw->mac.type != ixgbe_mac_82598EB) {
3298                         config_dcb_rx = DCB_RX_CONFIG;
3299                         /*
3300                          *get dcb and VT rx configuration parameters
3301                          *from rte_eth_conf
3302                          */
3303                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3304                         /*Configure general VMDQ and DCB RX parameters*/
3305                         ixgbe_vmdq_dcb_configure(dev);
3306                 }
3307                 break;
3308         case ETH_MQ_RX_DCB:
3309         case ETH_MQ_RX_DCB_RSS:
3310                 dcb_config->vt_mode = false;
3311                 config_dcb_rx = DCB_RX_CONFIG;
3312                 /* Get dcb TX configuration parameters from rte_eth_conf */
3313                 ixgbe_dcb_rx_config(dev, dcb_config);
3314                 /*Configure general DCB RX parameters*/
3315                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3316                 break;
3317         default:
3318                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3319                 break;
3320         }
3321         switch (dev->data->dev_conf.txmode.mq_mode) {
3322         case ETH_MQ_TX_VMDQ_DCB:
3323                 dcb_config->vt_mode = true;
3324                 config_dcb_tx = DCB_TX_CONFIG;
3325                 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3326                 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3327                 /*Configure general VMDQ and DCB TX parameters*/
3328                 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3329                 break;
3330
3331         case ETH_MQ_TX_DCB:
3332                 dcb_config->vt_mode = false;
3333                 config_dcb_tx = DCB_TX_CONFIG;
3334                 /*get DCB TX configuration parameters from rte_eth_conf*/
3335                 ixgbe_dcb_tx_config(dev, dcb_config);
3336                 /*Configure general DCB TX parameters*/
3337                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3338                 break;
3339         default:
3340                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3341                 break;
3342         }
3343
3344         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3345         /* Unpack map */
3346         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3347         if(nb_tcs == ETH_4_TCS) {
3348                 /* Avoid un-configured priority mapping to TC0 */
3349                 uint8_t j = 4;
3350                 uint8_t mask = 0xFF;
3351                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3352                         mask = (uint8_t)(mask & (~ (1 << map[i])));
3353                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3354                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3355                                 map[j++] = i;
3356                         mask >>= 1;
3357                 }
3358                 /* Re-configure 4 TCs BW */
3359                 for (i = 0; i < nb_tcs; i++) {
3360                         tc = &dcb_config->tc_config[i];
3361                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3362                                                 (uint8_t)(100 / nb_tcs);
3363                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3364                                                 (uint8_t)(100 / nb_tcs);
3365                 }
3366                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3367                         tc = &dcb_config->tc_config[i];
3368                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3369                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3370                 }
3371         }
3372
3373         switch (hw->mac.type) {
3374         case ixgbe_mac_X550:
3375         case ixgbe_mac_X550EM_x:
3376                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3377                 break;
3378         default:
3379                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3380                 break;
3381         }
3382
3383         if(config_dcb_rx) {
3384                 /* Set RX buffer size */
3385                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3386                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3387                 for (i = 0 ; i < nb_tcs; i++) {
3388                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3389                 }
3390                 /* zero alloc all unused TCs */
3391                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3392                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3393                 }
3394         }
3395         if(config_dcb_tx) {
3396                 /* Only support an equally distributed Tx packet buffer strategy. */
3397                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3398                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3399                 for (i = 0; i < nb_tcs; i++) {
3400                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3401                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3402                 }
3403                 /* Clear unused TCs, if any, to zero buffer size*/
3404                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3405                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3406                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3407                 }
3408         }
3409
3410         /*Calculates traffic class credits*/
3411         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3412                                 IXGBE_DCB_TX_CONFIG);
3413         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3414                                 IXGBE_DCB_RX_CONFIG);
3415
3416         if(config_dcb_rx) {
3417                 /* Unpack CEE standard containers */
3418                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3419                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3420                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3421                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3422                 /* Configure PG(ETS) RX */
3423                 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3424         }
3425
3426         if(config_dcb_tx) {
3427                 /* Unpack CEE standard containers */
3428                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3429                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3430                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3431                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3432                 /* Configure PG(ETS) TX */
3433                 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3434         }
3435
3436         /*Configure queue statistics registers*/
3437         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3438
3439         /* Check if the PFC is supported */
3440         if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3441                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3442                 for (i = 0; i < nb_tcs; i++) {
3443                         /*
3444                         * If the TC count is 8,and the default high_water is 48,
3445                         * the low_water is 16 as default.
3446                         */
3447                         hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3448                         hw->fc.low_water[i] = pbsize / 4;
3449                         /* Enable pfc for this TC */
3450                         tc = &dcb_config->tc_config[i];
3451                         tc->pfc = ixgbe_dcb_pfc_enabled;
3452                 }
3453                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3454                 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3455                         pfc_en &= 0x0F;
3456                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3457         }
3458
3459         return ret;
3460 }
3461
3462 /**
3463  * ixgbe_configure_dcb - Configure DCB  Hardware
3464  * @dev: pointer to rte_eth_dev
3465  */
3466 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3467 {
3468         struct ixgbe_dcb_config *dcb_cfg =
3469                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3470         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3471
3472         PMD_INIT_FUNC_TRACE();
3473
3474         /* check support mq_mode for DCB */
3475         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3476             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3477             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3478                 return;
3479
3480         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3481                 return;
3482
3483         /** Configure DCB hardware **/
3484         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3485
3486         return;
3487 }
3488
3489 /*
3490  * VMDq only support for 10 GbE NIC.
3491  */
3492 static void
3493 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3494 {
3495         struct rte_eth_vmdq_rx_conf *cfg;
3496         struct ixgbe_hw *hw;
3497         enum rte_eth_nb_pools num_pools;
3498         uint32_t mrqc, vt_ctl, vlanctrl;
3499         uint32_t vmolr = 0;
3500         int i;
3501
3502         PMD_INIT_FUNC_TRACE();
3503         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3504         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3505         num_pools = cfg->nb_queue_pools;
3506
3507         ixgbe_rss_disable(dev);
3508
3509         /* MRQC: enable vmdq */
3510         mrqc = IXGBE_MRQC_VMDQEN;
3511         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3512
3513         /* PFVTCTL: turn on virtualisation and set the default pool */
3514         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3515         if (cfg->enable_default_pool)
3516                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3517         else
3518                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3519
3520         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3521
3522         for (i = 0; i < (int)num_pools; i++) {
3523                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3524                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3525         }
3526
3527         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3528         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3529         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3530         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3531
3532         /* VFTA - enable all vlan filters */
3533         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3534                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3535
3536         /* VFRE: pool enabling for receive - 64 */
3537         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3538         if (num_pools == ETH_64_POOLS)
3539                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3540
3541         /*
3542          * MPSAR - allow pools to read specific mac addresses
3543          * In this case, all pools should be able to read from mac addr 0
3544          */
3545         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3546         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3547
3548         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3549         for (i = 0; i < cfg->nb_pool_maps; i++) {
3550                 /* set vlan id in VF register and set the valid bit */
3551                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3552                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3553                 /*
3554                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3555                  * pools, we only need to use the first half of the register
3556                  * i.e. bits 0-31
3557                  */
3558                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3559                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3560                                         (cfg->pool_map[i].pools & UINT32_MAX));
3561                 else
3562                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3563                                         ((cfg->pool_map[i].pools >> 32) \
3564                                         & UINT32_MAX));
3565
3566         }
3567
3568         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3569         if (cfg->enable_loop_back) {
3570                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3571                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3572                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3573         }
3574
3575         IXGBE_WRITE_FLUSH(hw);
3576 }
3577
3578 /*
3579  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3580  * @hw: pointer to hardware structure
3581  */
3582 static void
3583 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3584 {
3585         uint32_t reg;
3586         uint32_t q;
3587
3588         PMD_INIT_FUNC_TRACE();
3589         /*PF VF Transmit Enable*/
3590         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3591         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3592
3593         /* Disable the Tx desc arbiter so that MTQC can be changed */
3594         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3595         reg |= IXGBE_RTTDCS_ARBDIS;
3596         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3597
3598         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3599         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3600
3601         /* Disable drop for all queues */
3602         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3603                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3604                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3605
3606         /* Enable the Tx desc arbiter */
3607         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3608         reg &= ~IXGBE_RTTDCS_ARBDIS;
3609         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3610
3611         IXGBE_WRITE_FLUSH(hw);
3612
3613         return;
3614 }
3615
3616 static int __attribute__((cold))
3617 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3618 {
3619         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3620         uint64_t dma_addr;
3621         unsigned i;
3622
3623         /* Initialize software ring entries */
3624         for (i = 0; i < rxq->nb_rx_desc; i++) {
3625                 volatile union ixgbe_adv_rx_desc *rxd;
3626                 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3627                 if (mbuf == NULL) {
3628                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3629                                      (unsigned) rxq->queue_id);
3630                         return -ENOMEM;
3631                 }
3632
3633                 rte_mbuf_refcnt_set(mbuf, 1);
3634                 mbuf->next = NULL;
3635                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3636                 mbuf->nb_segs = 1;
3637                 mbuf->port = rxq->port_id;
3638
3639                 dma_addr =
3640                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3641                 rxd = &rxq->rx_ring[i];
3642                 rxd->read.hdr_addr = 0;
3643                 rxd->read.pkt_addr = dma_addr;
3644                 rxe[i].mbuf = mbuf;
3645         }
3646
3647         return 0;
3648 }
3649
3650 static int
3651 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3652 {
3653         struct ixgbe_hw *hw;
3654         uint32_t mrqc;
3655
3656         ixgbe_rss_configure(dev);
3657
3658         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3659
3660         /* MRQC: enable VF RSS */
3661         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3662         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3663         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3664         case ETH_64_POOLS:
3665                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3666                 break;
3667
3668         case ETH_32_POOLS:
3669                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3670                 break;
3671
3672         default:
3673                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3674                 return -EINVAL;
3675         }
3676
3677         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3678
3679         return 0;
3680 }
3681
3682 static int
3683 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3684 {
3685         struct ixgbe_hw *hw =
3686                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3687
3688         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3689         case ETH_64_POOLS:
3690                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3691                         IXGBE_MRQC_VMDQEN);
3692                 break;
3693
3694         case ETH_32_POOLS:
3695                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3696                         IXGBE_MRQC_VMDQRT4TCEN);
3697                 break;
3698
3699         case ETH_16_POOLS:
3700                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3701                         IXGBE_MRQC_VMDQRT8TCEN);
3702                 break;
3703         default:
3704                 PMD_INIT_LOG(ERR,
3705                         "invalid pool number in IOV mode");
3706                 break;
3707         }
3708         return 0;
3709 }
3710
3711 static int
3712 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3713 {
3714         struct ixgbe_hw *hw =
3715                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3716
3717         if (hw->mac.type == ixgbe_mac_82598EB)
3718                 return 0;
3719
3720         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3721                 /*
3722                  * SRIOV inactive scheme
3723                  * any DCB/RSS w/o VMDq multi-queue setting
3724                  */
3725                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3726                 case ETH_MQ_RX_RSS:
3727                 case ETH_MQ_RX_DCB_RSS:
3728                 case ETH_MQ_RX_VMDQ_RSS:
3729                         ixgbe_rss_configure(dev);
3730                         break;
3731
3732                 case ETH_MQ_RX_VMDQ_DCB:
3733                         ixgbe_vmdq_dcb_configure(dev);
3734                         break;
3735
3736                 case ETH_MQ_RX_VMDQ_ONLY:
3737                         ixgbe_vmdq_rx_hw_configure(dev);
3738                         break;
3739
3740                 case ETH_MQ_RX_NONE:
3741                 default:
3742                         /* if mq_mode is none, disable rss mode.*/
3743                         ixgbe_rss_disable(dev);
3744                         break;
3745                 }
3746         } else {
3747                 /*
3748                  * SRIOV active scheme
3749                  * Support RSS together with VMDq & SRIOV
3750                  */
3751                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3752                 case ETH_MQ_RX_RSS:
3753                 case ETH_MQ_RX_VMDQ_RSS:
3754                         ixgbe_config_vf_rss(dev);
3755                         break;
3756
3757                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3758                 case ETH_MQ_RX_VMDQ_DCB:
3759                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3760                         PMD_INIT_LOG(ERR,
3761                                 "Could not support DCB with VMDq & SRIOV");
3762                         return -1;
3763                 default:
3764                         ixgbe_config_vf_default(dev);
3765                         break;
3766                 }
3767         }
3768
3769         return 0;
3770 }
3771
3772 static int
3773 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3774 {
3775         struct ixgbe_hw *hw =
3776                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3777         uint32_t mtqc;
3778         uint32_t rttdcs;
3779
3780         if (hw->mac.type == ixgbe_mac_82598EB)
3781                 return 0;
3782
3783         /* disable arbiter before setting MTQC */
3784         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3785         rttdcs |= IXGBE_RTTDCS_ARBDIS;
3786         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3787
3788         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3789                 /*
3790                  * SRIOV inactive scheme
3791                  * any DCB w/o VMDq multi-queue setting
3792                  */
3793                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3794                         ixgbe_vmdq_tx_hw_configure(hw);
3795                 else {
3796                         mtqc = IXGBE_MTQC_64Q_1PB;
3797                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3798                 }
3799         } else {
3800                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3801
3802                 /*
3803                  * SRIOV active scheme
3804                  * FIXME if support DCB together with VMDq & SRIOV
3805                  */
3806                 case ETH_64_POOLS:
3807                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3808                         break;
3809                 case ETH_32_POOLS:
3810                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3811                         break;
3812                 case ETH_16_POOLS:
3813                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3814                                 IXGBE_MTQC_8TC_8TQ;
3815                         break;
3816                 default:
3817                         mtqc = IXGBE_MTQC_64Q_1PB;
3818                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3819                 }
3820                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3821         }
3822
3823         /* re-enable arbiter */
3824         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3825         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3826
3827         return 0;
3828 }
3829
3830 /**
3831  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3832  *
3833  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3834  * spec rev. 3.0 chapter 8.2.3.8.13.
3835  *
3836  * @pool Memory pool of the Rx queue
3837  */
3838 static inline uint32_t
3839 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3840 {
3841         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3842
3843         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3844         uint16_t maxdesc =
3845                 IPV4_MAX_PKT_LEN /
3846                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3847
3848         if (maxdesc >= 16)
3849                 return IXGBE_RSCCTL_MAXDESC_16;
3850         else if (maxdesc >= 8)
3851                 return IXGBE_RSCCTL_MAXDESC_8;
3852         else if (maxdesc >= 4)
3853                 return IXGBE_RSCCTL_MAXDESC_4;
3854         else
3855                 return IXGBE_RSCCTL_MAXDESC_1;
3856 }
3857
3858 /**
3859  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3860  * interrupt
3861  *
3862  * (Taken from FreeBSD tree)
3863  * (yes this is all very magic and confusing :)
3864  *
3865  * @dev port handle
3866  * @entry the register array entry
3867  * @vector the MSIX vector for this queue
3868  * @type RX/TX/MISC
3869  */
3870 static void
3871 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3872 {
3873         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3874         u32 ivar, index;
3875
3876         vector |= IXGBE_IVAR_ALLOC_VAL;
3877
3878         switch (hw->mac.type) {
3879
3880         case ixgbe_mac_82598EB:
3881                 if (type == -1)
3882                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3883                 else
3884                         entry += (type * 64);
3885                 index = (entry >> 2) & 0x1F;
3886                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3887                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3888                 ivar |= (vector << (8 * (entry & 0x3)));
3889                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3890                 break;
3891
3892         case ixgbe_mac_82599EB:
3893         case ixgbe_mac_X540:
3894                 if (type == -1) { /* MISC IVAR */
3895                         index = (entry & 1) * 8;
3896                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3897                         ivar &= ~(0xFF << index);
3898                         ivar |= (vector << index);
3899                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3900                 } else {        /* RX/TX IVARS */
3901                         index = (16 * (entry & 1)) + (8 * type);
3902                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3903                         ivar &= ~(0xFF << index);
3904                         ivar |= (vector << index);
3905                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3906                 }
3907
3908                 break;
3909
3910         default:
3911                 break;
3912         }
3913 }
3914
3915 void __attribute__((cold))
3916 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3917 {
3918         uint16_t i, rx_using_sse;
3919         struct ixgbe_adapter *adapter =
3920                 (struct ixgbe_adapter *)dev->data->dev_private;
3921
3922         /*
3923          * In order to allow Vector Rx there are a few configuration
3924          * conditions to be met and Rx Bulk Allocation should be allowed.
3925          */
3926         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3927             !adapter->rx_bulk_alloc_allowed) {
3928                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3929                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
3930                                     "not enabled",
3931                              dev->data->port_id);
3932
3933                 adapter->rx_vec_allowed = false;
3934         }
3935
3936         /*
3937          * Initialize the appropriate LRO callback.
3938          *
3939          * If all queues satisfy the bulk allocation preconditions
3940          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3941          * Otherwise use a single allocation version.
3942          */
3943         if (dev->data->lro) {
3944                 if (adapter->rx_bulk_alloc_allowed) {
3945                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3946                                            "allocation version");
3947                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3948                 } else {
3949                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3950                                            "allocation version");
3951                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3952                 }
3953         } else if (dev->data->scattered_rx) {
3954                 /*
3955                  * Set the non-LRO scattered callback: there are Vector and
3956                  * single allocation versions.
3957                  */
3958                 if (adapter->rx_vec_allowed) {
3959                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3960                                             "callback (port=%d).",
3961                                      dev->data->port_id);
3962
3963                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3964                 } else if (adapter->rx_bulk_alloc_allowed) {
3965                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3966                                            "allocation callback (port=%d).",
3967                                      dev->data->port_id);
3968                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3969                 } else {
3970                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3971                                             "single allocation) "
3972                                             "Scattered Rx callback "
3973                                             "(port=%d).",
3974                                      dev->data->port_id);
3975
3976                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3977                 }
3978         /*
3979          * Below we set "simple" callbacks according to port/queues parameters.
3980          * If parameters allow we are going to choose between the following
3981          * callbacks:
3982          *    - Vector
3983          *    - Bulk Allocation
3984          *    - Single buffer allocation (the simplest one)
3985          */
3986         } else if (adapter->rx_vec_allowed) {
3987                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3988                                     "burst size no less than %d (port=%d).",
3989                              RTE_IXGBE_DESCS_PER_LOOP,
3990                              dev->data->port_id);
3991
3992                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3993         } else if (adapter->rx_bulk_alloc_allowed) {
3994                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3995                                     "satisfied. Rx Burst Bulk Alloc function "
3996                                     "will be used on port=%d.",
3997                              dev->data->port_id);
3998
3999                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4000         } else {
4001                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4002                                     "satisfied, or Scattered Rx is requested "
4003                                     "(port=%d).",
4004                              dev->data->port_id);
4005
4006                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4007         }
4008
4009         /* Propagate information about RX function choice through all queues. */
4010
4011         rx_using_sse =
4012                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4013                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4014
4015         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4016                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4017                 rxq->rx_using_sse = rx_using_sse;
4018         }
4019 }
4020
4021 /**
4022  * ixgbe_set_rsc - configure RSC related port HW registers
4023  *
4024  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4025  * of 82599 Spec (x540 configuration is virtually the same).
4026  *
4027  * @dev port handle
4028  *
4029  * Returns 0 in case of success or a non-zero error code
4030  */
4031 static int
4032 ixgbe_set_rsc(struct rte_eth_dev *dev)
4033 {
4034         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4035         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4036         struct rte_eth_dev_info dev_info = { 0 };
4037         bool rsc_capable = false;
4038         uint16_t i;
4039         uint32_t rdrxctl;
4040
4041         /* Sanity check */
4042         dev->dev_ops->dev_infos_get(dev, &dev_info);
4043         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4044                 rsc_capable = true;
4045
4046         if (!rsc_capable && rx_conf->enable_lro) {
4047                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4048                                    "support it");
4049                 return -EINVAL;
4050         }
4051
4052         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4053
4054         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4055                 /*
4056                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4057                  * 3.0 RSC configuration requires HW CRC stripping being
4058                  * enabled. If user requested both HW CRC stripping off
4059                  * and RSC on - return an error.
4060                  */
4061                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4062                                     "is disabled");
4063                 return -EINVAL;
4064         }
4065
4066         /* RFCTL configuration  */
4067         if (rsc_capable) {
4068                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4069                 if (rx_conf->enable_lro)
4070                         /*
4071                          * Since NFS packets coalescing is not supported - clear
4072                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4073                          * enabled.
4074                          */
4075                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4076                                    IXGBE_RFCTL_NFSR_DIS);
4077                 else
4078                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4079
4080                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4081         }
4082
4083         /* If LRO hasn't been requested - we are done here. */
4084         if (!rx_conf->enable_lro)
4085                 return 0;
4086
4087         /* Set RDRXCTL.RSCACKC bit */
4088         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4089         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4090         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4091
4092         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4093         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4094                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4095                 uint32_t srrctl =
4096                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4097                 uint32_t rscctl =
4098                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4099                 uint32_t psrtype =
4100                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4101                 uint32_t eitr =
4102                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4103
4104                 /*
4105                  * ixgbe PMD doesn't support header-split at the moment.
4106                  *
4107                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4108                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4109                  * should be configured even if header split is not
4110                  * enabled. We will configure it 128 bytes following the
4111                  * recommendation in the spec.
4112                  */
4113                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4114                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4115                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4116
4117                 /*
4118                  * TODO: Consider setting the Receive Descriptor Minimum
4119                  * Threshold Size for an RSC case. This is not an obviously
4120                  * beneficiary option but the one worth considering...
4121                  */
4122
4123                 rscctl |= IXGBE_RSCCTL_RSCEN;
4124                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4125                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4126
4127                 /*
4128                  * RSC: Set ITR interval corresponding to 2K ints/s.
4129                  *
4130                  * Full-sized RSC aggregations for a 10Gb/s link will
4131                  * arrive at about 20K aggregation/s rate.
4132                  *
4133                  * 2K inst/s rate will make only 10% of the
4134                  * aggregations to be closed due to the interrupt timer
4135                  * expiration for a streaming at wire-speed case.
4136                  *
4137                  * For a sparse streaming case this setting will yield
4138                  * at most 500us latency for a single RSC aggregation.
4139                  */
4140                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4141                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4142
4143                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4144                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4145                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4146                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4147
4148                 /*
4149                  * RSC requires the mapping of the queue to the
4150                  * interrupt vector.
4151                  */
4152                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4153         }
4154
4155         dev->data->lro = 1;
4156
4157         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4158
4159         return 0;
4160 }
4161
4162 /*
4163  * Initializes Receive Unit.
4164  */
4165 int __attribute__((cold))
4166 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4167 {
4168         struct ixgbe_hw     *hw;
4169         struct ixgbe_rx_queue *rxq;
4170         uint64_t bus_addr;
4171         uint32_t rxctrl;
4172         uint32_t fctrl;
4173         uint32_t hlreg0;
4174         uint32_t maxfrs;
4175         uint32_t srrctl;
4176         uint32_t rdrxctl;
4177         uint32_t rxcsum;
4178         uint16_t buf_size;
4179         uint16_t i;
4180         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4181         int rc;
4182
4183         PMD_INIT_FUNC_TRACE();
4184         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4185
4186         /*
4187          * Make sure receives are disabled while setting
4188          * up the RX context (registers, descriptor rings, etc.).
4189          */
4190         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4191         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4192
4193         /* Enable receipt of broadcasted frames */
4194         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4195         fctrl |= IXGBE_FCTRL_BAM;
4196         fctrl |= IXGBE_FCTRL_DPF;
4197         fctrl |= IXGBE_FCTRL_PMCF;
4198         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4199
4200         /*
4201          * Configure CRC stripping, if any.
4202          */
4203         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4204         if (rx_conf->hw_strip_crc)
4205                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4206         else
4207                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4208
4209         /*
4210          * Configure jumbo frame support, if any.
4211          */
4212         if (rx_conf->jumbo_frame == 1) {
4213                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4214                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4215                 maxfrs &= 0x0000FFFF;
4216                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4217                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4218         } else
4219                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4220
4221         /*
4222          * If loopback mode is configured for 82599, set LPBK bit.
4223          */
4224         if (hw->mac.type == ixgbe_mac_82599EB &&
4225                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4226                 hlreg0 |= IXGBE_HLREG0_LPBK;
4227         else
4228                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4229
4230         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4231
4232         /* Setup RX queues */
4233         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4234                 rxq = dev->data->rx_queues[i];
4235
4236                 /*
4237                  * Reset crc_len in case it was changed after queue setup by a
4238                  * call to configure.
4239                  */
4240                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4241
4242                 /* Setup the Base and Length of the Rx Descriptor Rings */
4243                 bus_addr = rxq->rx_ring_phys_addr;
4244                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4245                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4246                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4247                                 (uint32_t)(bus_addr >> 32));
4248                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4249                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4250                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4251                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4252
4253                 /* Configure the SRRCTL register */
4254 #ifdef RTE_HEADER_SPLIT_ENABLE
4255                 /*
4256                  * Configure Header Split
4257                  */
4258                 if (rx_conf->header_split) {
4259                         if (hw->mac.type == ixgbe_mac_82599EB) {
4260                                 /* Must setup the PSRTYPE register */
4261                                 uint32_t psrtype;
4262                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4263                                         IXGBE_PSRTYPE_UDPHDR   |
4264                                         IXGBE_PSRTYPE_IPV4HDR  |
4265                                         IXGBE_PSRTYPE_IPV6HDR;
4266                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4267                         }
4268                         srrctl = ((rx_conf->split_hdr_size <<
4269                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4270                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4271                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4272                 } else
4273 #endif
4274                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4275
4276                 /* Set if packets are dropped when no descriptors available */
4277                 if (rxq->drop_en)
4278                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4279
4280                 /*
4281                  * Configure the RX buffer size in the BSIZEPACKET field of
4282                  * the SRRCTL register of the queue.
4283                  * The value is in 1 KB resolution. Valid values can be from
4284                  * 1 KB to 16 KB.
4285                  */
4286                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4287                         RTE_PKTMBUF_HEADROOM);
4288                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4289                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4290
4291                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4292
4293                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4294                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4295
4296                 /* It adds dual VLAN length for supporting dual VLAN */
4297                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4298                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4299                         dev->data->scattered_rx = 1;
4300         }
4301
4302         if (rx_conf->enable_scatter)
4303                 dev->data->scattered_rx = 1;
4304
4305         /*
4306          * Device configured with multiple RX queues.
4307          */
4308         ixgbe_dev_mq_rx_configure(dev);
4309
4310         /*
4311          * Setup the Checksum Register.
4312          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4313          * Enable IP/L4 checkum computation by hardware if requested to do so.
4314          */
4315         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4316         rxcsum |= IXGBE_RXCSUM_PCSD;
4317         if (rx_conf->hw_ip_checksum)
4318                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4319         else
4320                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4321
4322         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4323
4324         if (hw->mac.type == ixgbe_mac_82599EB ||
4325             hw->mac.type == ixgbe_mac_X540) {
4326                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4327                 if (rx_conf->hw_strip_crc)
4328                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4329                 else
4330                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4331                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4332                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4333         }
4334
4335         rc = ixgbe_set_rsc(dev);
4336         if (rc)
4337                 return rc;
4338
4339         ixgbe_set_rx_function(dev);
4340
4341         return 0;
4342 }
4343
4344 /*
4345  * Initializes Transmit Unit.
4346  */
4347 void __attribute__((cold))
4348 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4349 {
4350         struct ixgbe_hw     *hw;
4351         struct ixgbe_tx_queue *txq;
4352         uint64_t bus_addr;
4353         uint32_t hlreg0;
4354         uint32_t txctrl;
4355         uint16_t i;
4356
4357         PMD_INIT_FUNC_TRACE();
4358         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4359
4360         /* Enable TX CRC (checksum offload requirement) and hw padding
4361          * (TSO requirement) */
4362         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4363         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4364         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4365
4366         /* Setup the Base and Length of the Tx Descriptor Rings */
4367         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4368                 txq = dev->data->tx_queues[i];
4369
4370                 bus_addr = txq->tx_ring_phys_addr;
4371                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4372                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4373                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4374                                 (uint32_t)(bus_addr >> 32));
4375                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4376                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4377                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4378                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4379                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4380
4381                 /*
4382                  * Disable Tx Head Writeback RO bit, since this hoses
4383                  * bookkeeping if things aren't delivered in order.
4384                  */
4385                 switch (hw->mac.type) {
4386                         case ixgbe_mac_82598EB:
4387                                 txctrl = IXGBE_READ_REG(hw,
4388                                                         IXGBE_DCA_TXCTRL(txq->reg_idx));
4389                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4390                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4391                                                 txctrl);
4392                                 break;
4393
4394                         case ixgbe_mac_82599EB:
4395                         case ixgbe_mac_X540:
4396                         case ixgbe_mac_X550:
4397                         case ixgbe_mac_X550EM_x:
4398                         default:
4399                                 txctrl = IXGBE_READ_REG(hw,
4400                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4401                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4402                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4403                                                 txctrl);
4404                                 break;
4405                 }
4406         }
4407
4408         /* Device configured with multiple TX queues. */
4409         ixgbe_dev_mq_tx_configure(dev);
4410 }
4411
4412 /*
4413  * Set up link for 82599 loopback mode Tx->Rx.
4414  */
4415 static inline void __attribute__((cold))
4416 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4417 {
4418         PMD_INIT_FUNC_TRACE();
4419
4420         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4421                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4422                                 IXGBE_SUCCESS) {
4423                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4424                         /* ignore error */
4425                         return;
4426                 }
4427         }
4428
4429         /* Restart link */
4430         IXGBE_WRITE_REG(hw,
4431                         IXGBE_AUTOC,
4432                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4433         ixgbe_reset_pipeline_82599(hw);
4434
4435         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4436         msec_delay(50);
4437 }
4438
4439
4440 /*
4441  * Start Transmit and Receive Units.
4442  */
4443 int __attribute__((cold))
4444 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4445 {
4446         struct ixgbe_hw     *hw;
4447         struct ixgbe_tx_queue *txq;
4448         struct ixgbe_rx_queue *rxq;
4449         uint32_t txdctl;
4450         uint32_t dmatxctl;
4451         uint32_t rxctrl;
4452         uint16_t i;
4453         int ret = 0;
4454
4455         PMD_INIT_FUNC_TRACE();
4456         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4457
4458         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4459                 txq = dev->data->tx_queues[i];
4460                 /* Setup Transmit Threshold Registers */
4461                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4462                 txdctl |= txq->pthresh & 0x7F;
4463                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4464                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4465                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4466         }
4467
4468         if (hw->mac.type != ixgbe_mac_82598EB) {
4469                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4470                 dmatxctl |= IXGBE_DMATXCTL_TE;
4471                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4472         }
4473
4474         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4475                 txq = dev->data->tx_queues[i];
4476                 if (!txq->tx_deferred_start) {
4477                         ret = ixgbe_dev_tx_queue_start(dev, i);
4478                         if (ret < 0)
4479                                 return ret;
4480                 }
4481         }
4482
4483         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4484                 rxq = dev->data->rx_queues[i];
4485                 if (!rxq->rx_deferred_start) {
4486                         ret = ixgbe_dev_rx_queue_start(dev, i);
4487                         if (ret < 0)
4488                                 return ret;
4489                 }
4490         }
4491
4492         /* Enable Receive engine */
4493         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4494         if (hw->mac.type == ixgbe_mac_82598EB)
4495                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4496         rxctrl |= IXGBE_RXCTRL_RXEN;
4497         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4498
4499         /* If loopback mode is enabled for 82599, set up the link accordingly */
4500         if (hw->mac.type == ixgbe_mac_82599EB &&
4501                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4502                 ixgbe_setup_loopback_link_82599(hw);
4503
4504         return 0;
4505 }
4506
4507 /*
4508  * Start Receive Units for specified queue.
4509  */
4510 int __attribute__((cold))
4511 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4512 {
4513         struct ixgbe_hw     *hw;
4514         struct ixgbe_rx_queue *rxq;
4515         uint32_t rxdctl;
4516         int poll_ms;
4517
4518         PMD_INIT_FUNC_TRACE();
4519         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4520
4521         if (rx_queue_id < dev->data->nb_rx_queues) {
4522                 rxq = dev->data->rx_queues[rx_queue_id];
4523
4524                 /* Allocate buffers for descriptor rings */
4525                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4526                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4527                                      rx_queue_id);
4528                         return -1;
4529                 }
4530                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4531                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4532                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4533
4534                 /* Wait until RX Enable ready */
4535                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4536                 do {
4537                         rte_delay_ms(1);
4538                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4539                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4540                 if (!poll_ms)
4541                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4542                                      rx_queue_id);
4543                 rte_wmb();
4544                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4545                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4546                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4547         } else
4548                 return -1;
4549
4550         return 0;
4551 }
4552
4553 /*
4554  * Stop Receive Units for specified queue.
4555  */
4556 int __attribute__((cold))
4557 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4558 {
4559         struct ixgbe_hw     *hw;
4560         struct ixgbe_adapter *adapter =
4561                 (struct ixgbe_adapter *)dev->data->dev_private;
4562         struct ixgbe_rx_queue *rxq;
4563         uint32_t rxdctl;
4564         int poll_ms;
4565
4566         PMD_INIT_FUNC_TRACE();
4567         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4568
4569         if (rx_queue_id < dev->data->nb_rx_queues) {
4570                 rxq = dev->data->rx_queues[rx_queue_id];
4571
4572                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4573                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4574                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4575
4576                 /* Wait until RX Enable ready */
4577                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4578                 do {
4579                         rte_delay_ms(1);
4580                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4581                 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4582                 if (!poll_ms)
4583                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4584                                      rx_queue_id);
4585
4586                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4587
4588                 ixgbe_rx_queue_release_mbufs(rxq);
4589                 ixgbe_reset_rx_queue(adapter, rxq);
4590                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4591         } else
4592                 return -1;
4593
4594         return 0;
4595 }
4596
4597
4598 /*
4599  * Start Transmit Units for specified queue.
4600  */
4601 int __attribute__((cold))
4602 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4603 {
4604         struct ixgbe_hw     *hw;
4605         struct ixgbe_tx_queue *txq;
4606         uint32_t txdctl;
4607         int poll_ms;
4608
4609         PMD_INIT_FUNC_TRACE();
4610         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4611
4612         if (tx_queue_id < dev->data->nb_tx_queues) {
4613                 txq = dev->data->tx_queues[tx_queue_id];
4614                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4615                 txdctl |= IXGBE_TXDCTL_ENABLE;
4616                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4617
4618                 /* Wait until TX Enable ready */
4619                 if (hw->mac.type == ixgbe_mac_82599EB) {
4620                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4621                         do {
4622                                 rte_delay_ms(1);
4623                                 txdctl = IXGBE_READ_REG(hw,
4624                                         IXGBE_TXDCTL(txq->reg_idx));
4625                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4626                         if (!poll_ms)
4627                                 PMD_INIT_LOG(ERR, "Could not enable "
4628                                              "Tx Queue %d", tx_queue_id);
4629                 }
4630                 rte_wmb();
4631                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4632                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4633                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4634         } else
4635                 return -1;
4636
4637         return 0;
4638 }
4639
4640 /*
4641  * Stop Transmit Units for specified queue.
4642  */
4643 int __attribute__((cold))
4644 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4645 {
4646         struct ixgbe_hw     *hw;
4647         struct ixgbe_tx_queue *txq;
4648         uint32_t txdctl;
4649         uint32_t txtdh, txtdt;
4650         int poll_ms;
4651
4652         PMD_INIT_FUNC_TRACE();
4653         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4654
4655         if (tx_queue_id < dev->data->nb_tx_queues) {
4656                 txq = dev->data->tx_queues[tx_queue_id];
4657
4658                 /* Wait until TX queue is empty */
4659                 if (hw->mac.type == ixgbe_mac_82599EB) {
4660                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4661                         do {
4662                                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4663                                 txtdh = IXGBE_READ_REG(hw,
4664                                                 IXGBE_TDH(txq->reg_idx));
4665                                 txtdt = IXGBE_READ_REG(hw,
4666                                                 IXGBE_TDT(txq->reg_idx));
4667                         } while (--poll_ms && (txtdh != txtdt));
4668                         if (!poll_ms)
4669                                 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4670                                              "when stopping.", tx_queue_id);
4671                 }
4672
4673                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4674                 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4675                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4676
4677                 /* Wait until TX Enable ready */
4678                 if (hw->mac.type == ixgbe_mac_82599EB) {
4679                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4680                         do {
4681                                 rte_delay_ms(1);
4682                                 txdctl = IXGBE_READ_REG(hw,
4683                                                 IXGBE_TXDCTL(txq->reg_idx));
4684                         } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4685                         if (!poll_ms)
4686                                 PMD_INIT_LOG(ERR, "Could not disable "
4687                                              "Tx Queue %d", tx_queue_id);
4688                 }
4689
4690                 if (txq->ops != NULL) {
4691                         txq->ops->release_mbufs(txq);
4692                         txq->ops->reset(txq);
4693                 }
4694                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4695         } else
4696                 return -1;
4697
4698         return 0;
4699 }
4700
4701 void
4702 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4703         struct rte_eth_rxq_info *qinfo)
4704 {
4705         struct ixgbe_rx_queue *rxq;
4706
4707         rxq = dev->data->rx_queues[queue_id];
4708
4709         qinfo->mp = rxq->mb_pool;
4710         qinfo->scattered_rx = dev->data->scattered_rx;
4711         qinfo->nb_desc = rxq->nb_rx_desc;
4712
4713         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4714         qinfo->conf.rx_drop_en = rxq->drop_en;
4715         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4716 }
4717
4718 void
4719 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4720         struct rte_eth_txq_info *qinfo)
4721 {
4722         struct ixgbe_tx_queue *txq;
4723
4724         txq = dev->data->tx_queues[queue_id];
4725
4726         qinfo->nb_desc = txq->nb_tx_desc;
4727
4728         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4729         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4730         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4731
4732         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4733         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4734         qinfo->conf.txq_flags = txq->txq_flags;
4735         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4736 }
4737
4738 /*
4739  * [VF] Initializes Receive Unit.
4740  */
4741 int __attribute__((cold))
4742 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4743 {
4744         struct ixgbe_hw     *hw;
4745         struct ixgbe_rx_queue *rxq;
4746         uint64_t bus_addr;
4747         uint32_t srrctl, psrtype = 0;
4748         uint16_t buf_size;
4749         uint16_t i;
4750         int ret;
4751
4752         PMD_INIT_FUNC_TRACE();
4753         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4754
4755         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4756                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4757                         "it should be power of 2");
4758                 return -1;
4759         }
4760
4761         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4762                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4763                         "it should be equal to or less than %d",
4764                         hw->mac.max_rx_queues);
4765                 return -1;
4766         }
4767
4768         /*
4769          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4770          * disables the VF receipt of packets if the PF MTU is > 1500.
4771          * This is done to deal with 82599 limitations that imposes
4772          * the PF and all VFs to share the same MTU.
4773          * Then, the PF driver enables again the VF receipt of packet when
4774          * the VF driver issues a IXGBE_VF_SET_LPE request.
4775          * In the meantime, the VF device cannot be used, even if the VF driver
4776          * and the Guest VM network stack are ready to accept packets with a
4777          * size up to the PF MTU.
4778          * As a work-around to this PF behaviour, force the call to
4779          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4780          * VF packets received can work in all cases.
4781          */
4782         ixgbevf_rlpml_set_vf(hw,
4783                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4784
4785         /* Setup RX queues */
4786         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4787                 rxq = dev->data->rx_queues[i];
4788
4789                 /* Allocate buffers for descriptor rings */
4790                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4791                 if (ret)
4792                         return ret;
4793
4794                 /* Setup the Base and Length of the Rx Descriptor Rings */
4795                 bus_addr = rxq->rx_ring_phys_addr;
4796
4797                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4798                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4799                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4800                                 (uint32_t)(bus_addr >> 32));
4801                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4802                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4803                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4804                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4805
4806
4807                 /* Configure the SRRCTL register */
4808 #ifdef RTE_HEADER_SPLIT_ENABLE
4809                 /*
4810                  * Configure Header Split
4811                  */
4812                 if (dev->data->dev_conf.rxmode.header_split) {
4813                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4814                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4815                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4816                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4817                 } else
4818 #endif
4819                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4820
4821                 /* Set if packets are dropped when no descriptors available */
4822                 if (rxq->drop_en)
4823                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4824
4825                 /*
4826                  * Configure the RX buffer size in the BSIZEPACKET field of
4827                  * the SRRCTL register of the queue.
4828                  * The value is in 1 KB resolution. Valid values can be from
4829                  * 1 KB to 16 KB.
4830                  */
4831                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4832                         RTE_PKTMBUF_HEADROOM);
4833                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4834                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4835
4836                 /*
4837                  * VF modification to write virtual function SRRCTL register
4838                  */
4839                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4840
4841                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4842                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4843
4844                 if (dev->data->dev_conf.rxmode.enable_scatter ||
4845                     /* It adds dual VLAN length for supporting dual VLAN */
4846                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4847                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4848                         if (!dev->data->scattered_rx)
4849                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4850                         dev->data->scattered_rx = 1;
4851                 }
4852         }
4853
4854 #ifdef RTE_HEADER_SPLIT_ENABLE
4855         if (dev->data->dev_conf.rxmode.header_split)
4856                 /* Must setup the PSRTYPE register */
4857                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4858                         IXGBE_PSRTYPE_UDPHDR   |
4859                         IXGBE_PSRTYPE_IPV4HDR  |
4860                         IXGBE_PSRTYPE_IPV6HDR;
4861 #endif
4862
4863         /* Set RQPL for VF RSS according to max Rx queue */
4864         psrtype |= (dev->data->nb_rx_queues >> 1) <<
4865                 IXGBE_PSRTYPE_RQPL_SHIFT;
4866         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4867
4868         ixgbe_set_rx_function(dev);
4869
4870         return 0;
4871 }
4872
4873 /*
4874  * [VF] Initializes Transmit Unit.
4875  */
4876 void __attribute__((cold))
4877 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4878 {
4879         struct ixgbe_hw     *hw;
4880         struct ixgbe_tx_queue *txq;
4881         uint64_t bus_addr;
4882         uint32_t txctrl;
4883         uint16_t i;
4884
4885         PMD_INIT_FUNC_TRACE();
4886         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4887
4888         /* Setup the Base and Length of the Tx Descriptor Rings */
4889         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4890                 txq = dev->data->tx_queues[i];
4891                 bus_addr = txq->tx_ring_phys_addr;
4892                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4893                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4894                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4895                                 (uint32_t)(bus_addr >> 32));
4896                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4897                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4898                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4899                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4900                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4901
4902                 /*
4903                  * Disable Tx Head Writeback RO bit, since this hoses
4904                  * bookkeeping if things aren't delivered in order.
4905                  */
4906                 txctrl = IXGBE_READ_REG(hw,
4907                                 IXGBE_VFDCA_TXCTRL(i));
4908                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4909                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4910                                 txctrl);
4911         }
4912 }
4913
4914 /*
4915  * [VF] Start Transmit and Receive Units.
4916  */
4917 void __attribute__((cold))
4918 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4919 {
4920         struct ixgbe_hw     *hw;
4921         struct ixgbe_tx_queue *txq;
4922         struct ixgbe_rx_queue *rxq;
4923         uint32_t txdctl;
4924         uint32_t rxdctl;
4925         uint16_t i;
4926         int poll_ms;
4927
4928         PMD_INIT_FUNC_TRACE();
4929         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4930
4931         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4932                 txq = dev->data->tx_queues[i];
4933                 /* Setup Transmit Threshold Registers */
4934                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4935                 txdctl |= txq->pthresh & 0x7F;
4936                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4937                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4938                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4939         }
4940
4941         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4942
4943                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4944                 txdctl |= IXGBE_TXDCTL_ENABLE;
4945                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4946
4947                 poll_ms = 10;
4948                 /* Wait until TX Enable ready */
4949                 do {
4950                         rte_delay_ms(1);
4951                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4952                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4953                 if (!poll_ms)
4954                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4955         }
4956         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4957
4958                 rxq = dev->data->rx_queues[i];
4959
4960                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4961                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4962                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4963
4964                 /* Wait until RX Enable ready */
4965                 poll_ms = 10;
4966                 do {
4967                         rte_delay_ms(1);
4968                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4969                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4970                 if (!poll_ms)
4971                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4972                 rte_wmb();
4973                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4974
4975         }
4976 }
4977
4978 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4979 int __attribute__((weak))
4980 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4981 {
4982         return -1;
4983 }
4984
4985 uint16_t __attribute__((weak))
4986 ixgbe_recv_pkts_vec(
4987         void __rte_unused *rx_queue,
4988         struct rte_mbuf __rte_unused **rx_pkts,
4989         uint16_t __rte_unused nb_pkts)
4990 {
4991         return 0;
4992 }
4993
4994 uint16_t __attribute__((weak))
4995 ixgbe_recv_scattered_pkts_vec(
4996         void __rte_unused *rx_queue,
4997         struct rte_mbuf __rte_unused **rx_pkts,
4998         uint16_t __rte_unused nb_pkts)
4999 {
5000         return 0;
5001 }
5002
5003 int __attribute__((weak))
5004 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5005 {
5006         return -1;
5007 }