remove extra parentheses in return statement
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG)
89
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
92 {
93         struct rte_mbuf *m;
94
95         m = __rte_mbuf_raw_alloc(mp);
96         __rte_mbuf_sanity_check_raw(m, 0);
97         return m;
98 }
99
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while(0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i;
130
131         /* check DD bit on threshold descriptor */
132         status = txq->tx_ring[txq->tx_next_dd].wb.status;
133         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
134                 return 0;
135
136         /*
137          * first buffer to free from S/W ring is at index
138          * tx_next_dd - (tx_rs_thresh-1)
139          */
140         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
141
142         /* free buffers one at a time */
143         if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145                         txep->mbuf->next = NULL;
146                         rte_mempool_put(txep->mbuf->pool, txep->mbuf);
147                         txep->mbuf = NULL;
148                 }
149         } else {
150                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151                         rte_pktmbuf_free_seg(txep->mbuf);
152                         txep->mbuf = NULL;
153                 }
154         }
155
156         /* buffers were freed, update counters */
157         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159         if (txq->tx_next_dd >= txq->nb_tx_desc)
160                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
161
162         return txq->tx_rs_thresh;
163 }
164
165 /* Populate 4 descriptors with data from 4 mbufs */
166 static inline void
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
168 {
169         uint64_t buf_dma_addr;
170         uint32_t pkt_len;
171         int i;
172
173         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174                 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
175                 pkt_len = (*pkts)->data_len;
176
177                 /* write data to descriptor */
178                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
179
180                 txdp->read.cmd_type_len =
181                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
182
183                 txdp->read.olinfo_status =
184                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
185
186                 rte_prefetch0(&(*pkts)->pool);
187         }
188 }
189
190 /* Populate 1 descriptor with data from 1 mbuf */
191 static inline void
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
193 {
194         uint64_t buf_dma_addr;
195         uint32_t pkt_len;
196
197         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
198         pkt_len = (*pkts)->data_len;
199
200         /* write data to descriptor */
201         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202         txdp->read.cmd_type_len =
203                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204         txdp->read.olinfo_status =
205                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206         rte_prefetch0(&(*pkts)->pool);
207 }
208
209 /*
210  * Fill H/W descriptor ring with mbuf data.
211  * Copy mbuf pointers to the S/W ring.
212  */
213 static inline void
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
215                       uint16_t nb_pkts)
216 {
217         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219         const int N_PER_LOOP = 4;
220         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221         int mainpart, leftover;
222         int i, j;
223
224         /*
225          * Process most of the packets in chunks of N pkts.  Any
226          * leftover packets will get processed one at a time.
227          */
228         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
230         for (i = 0; i < mainpart; i += N_PER_LOOP) {
231                 /* Copy N mbuf pointers to the S/W ring */
232                 for (j = 0; j < N_PER_LOOP; ++j) {
233                         (txep + i + j)->mbuf = *(pkts + i + j);
234                 }
235                 tx4(txdp + i, pkts + i);
236         }
237
238         if (unlikely(leftover > 0)) {
239                 for (i = 0; i < leftover; ++i) {
240                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241                         tx1(txdp + mainpart + i, pkts + mainpart + i);
242                 }
243         }
244 }
245
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
248              uint16_t nb_pkts)
249 {
250         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
252         uint16_t n = 0;
253
254         /*
255          * Begin scanning the H/W ring for done descriptors when the
256          * number of available descriptors drops below tx_free_thresh.  For
257          * each done descriptor, free the associated buffer.
258          */
259         if (txq->nb_tx_free < txq->tx_free_thresh)
260                 ixgbe_tx_free_bufs(txq);
261
262         /* Only use descriptors that are available */
263         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264         if (unlikely(nb_pkts == 0))
265                 return 0;
266
267         /* Use exactly nb_pkts descriptors */
268         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
269
270         /*
271          * At this point, we know there are enough descriptors in the
272          * ring to transmit all the packets.  This assumes that each
273          * mbuf contains a single segment, and that no new offloads
274          * are expected, which would require a new context descriptor.
275          */
276
277         /*
278          * See if we're going to wrap-around. If so, handle the top
279          * of the descriptor ring first, then do the bottom.  If not,
280          * the processing looks just like the "bottom" part anyway...
281          */
282         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
285
286                 /*
287                  * We know that the last descriptor in the ring will need to
288                  * have its RS bit set because tx_rs_thresh has to be
289                  * a divisor of the ring size
290                  */
291                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
294
295                 txq->tx_tail = 0;
296         }
297
298         /* Fill H/W descriptor ring with mbuf data */
299         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
301
302         /*
303          * Determine if RS bit should be set
304          * This is what we actually want:
305          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306          * but instead of subtracting 1 and doing >=, we can just do
307          * greater than without subtracting.
308          */
309         if (txq->tx_tail > txq->tx_next_rs) {
310                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
313                                                 txq->tx_rs_thresh);
314                 if (txq->tx_next_rs >= txq->nb_tx_desc)
315                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
316         }
317
318         /*
319          * Check for wrap-around. This would only happen if we used
320          * up to the last descriptor in the ring, no more, no less.
321          */
322         if (txq->tx_tail >= txq->nb_tx_desc)
323                 txq->tx_tail = 0;
324
325         /* update tail pointer */
326         rte_wmb();
327         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
328
329         return nb_pkts;
330 }
331
332 uint16_t
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
334                        uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337
338         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
341
342         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
343         nb_tx = 0;
344         while (nb_pkts) {
345                 uint16_t ret, n;
346                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348                 nb_tx = (uint16_t)(nb_tx + ret);
349                 nb_pkts = (uint16_t)(nb_pkts - ret);
350                 if (ret < n)
351                         break;
352         }
353
354         return nb_tx;
355 }
356
357 static inline void
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
361 {
362         uint32_t type_tucmd_mlhl;
363         uint32_t mss_l4len_idx = 0;
364         uint32_t ctx_idx;
365         uint32_t vlan_macip_lens;
366         union ixgbe_tx_offload tx_offload_mask;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         txq->ctx_cache[ctx_idx].flags = ol_flags;
434         txq->ctx_cache[ctx_idx].tx_offload.data  =
435                 tx_offload_mask.data & tx_offload.data;
436         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
437
438         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
439         vlan_macip_lens = tx_offload.l3_len;
440         vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
441         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
442         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
443         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
444         ctx_txd->seqnum_seed     = 0;
445 }
446
447 /*
448  * Check which hardware context can be used. Use the existing match
449  * or create a new context descriptor.
450  */
451 static inline uint32_t
452 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
453                 union ixgbe_tx_offload tx_offload)
454 {
455         /* If match with the current used context */
456         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
457                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
458                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
459                         return txq->ctx_curr;
460         }
461
462         /* What if match with the next context  */
463         txq->ctx_curr ^= 1;
464         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
465                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
466                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
467                         return txq->ctx_curr;
468         }
469
470         /* Mismatch, use the previous context */
471         return IXGBE_CTX_NUM;
472 }
473
474 static inline uint32_t
475 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
476 {
477         uint32_t tmp = 0;
478         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
479                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
480         if (ol_flags & PKT_TX_IP_CKSUM)
481                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
482         if (ol_flags & PKT_TX_TCP_SEG)
483                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
484         return tmp;
485 }
486
487 static inline uint32_t
488 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
489 {
490         uint32_t cmdtype = 0;
491         if (ol_flags & PKT_TX_VLAN_PKT)
492                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
493         if (ol_flags & PKT_TX_TCP_SEG)
494                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
495         return cmdtype;
496 }
497
498 /* Default RS bit threshold values */
499 #ifndef DEFAULT_TX_RS_THRESH
500 #define DEFAULT_TX_RS_THRESH   32
501 #endif
502 #ifndef DEFAULT_TX_FREE_THRESH
503 #define DEFAULT_TX_FREE_THRESH 32
504 #endif
505
506 /* Reset transmit descriptors after they have been used */
507 static inline int
508 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
509 {
510         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
511         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
512         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
513         uint16_t nb_tx_desc = txq->nb_tx_desc;
514         uint16_t desc_to_clean_to;
515         uint16_t nb_tx_to_clean;
516         uint32_t status;
517
518         /* Determine the last descriptor needing to be cleaned */
519         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
520         if (desc_to_clean_to >= nb_tx_desc)
521                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
522
523         /* Check to make sure the last descriptor to clean is done */
524         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
525         status = txr[desc_to_clean_to].wb.status;
526         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
527         {
528                 PMD_TX_FREE_LOG(DEBUG,
529                                 "TX descriptor %4u is not done"
530                                 "(port=%d queue=%d)",
531                                 desc_to_clean_to,
532                                 txq->port_id, txq->queue_id);
533                 /* Failed to clean any descriptors, better luck next time */
534                 return -(1);
535         }
536
537         /* Figure out how many descriptors will be cleaned */
538         if (last_desc_cleaned > desc_to_clean_to)
539                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
540                                                         desc_to_clean_to);
541         else
542                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
543                                                 last_desc_cleaned);
544
545         PMD_TX_FREE_LOG(DEBUG,
546                         "Cleaning %4u TX descriptors: %4u to %4u "
547                         "(port=%d queue=%d)",
548                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
549                         txq->port_id, txq->queue_id);
550
551         /*
552          * The last descriptor to clean is done, so that means all the
553          * descriptors from the last descriptor that was cleaned
554          * up to the last descriptor with the RS bit set
555          * are done. Only reset the threshold descriptor.
556          */
557         txr[desc_to_clean_to].wb.status = 0;
558
559         /* Update the txq to reflect the last descriptor that was cleaned */
560         txq->last_desc_cleaned = desc_to_clean_to;
561         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
562
563         /* No Error */
564         return 0;
565 }
566
567 uint16_t
568 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
569                 uint16_t nb_pkts)
570 {
571         struct ixgbe_tx_queue *txq;
572         struct ixgbe_tx_entry *sw_ring;
573         struct ixgbe_tx_entry *txe, *txn;
574         volatile union ixgbe_adv_tx_desc *txr;
575         volatile union ixgbe_adv_tx_desc *txd, *txp;
576         struct rte_mbuf     *tx_pkt;
577         struct rte_mbuf     *m_seg;
578         uint64_t buf_dma_addr;
579         uint32_t olinfo_status;
580         uint32_t cmd_type_len;
581         uint32_t pkt_len;
582         uint16_t slen;
583         uint64_t ol_flags;
584         uint16_t tx_id;
585         uint16_t tx_last;
586         uint16_t nb_tx;
587         uint16_t nb_used;
588         uint64_t tx_ol_req;
589         uint32_t ctx = 0;
590         uint32_t new_ctx;
591         union ixgbe_tx_offload tx_offload = {0};
592
593         txq = tx_queue;
594         sw_ring = txq->sw_ring;
595         txr     = txq->tx_ring;
596         tx_id   = txq->tx_tail;
597         txe = &sw_ring[tx_id];
598         txp = NULL;
599
600         /* Determine if the descriptor ring needs to be cleaned. */
601         if (txq->nb_tx_free < txq->tx_free_thresh)
602                 ixgbe_xmit_cleanup(txq);
603
604         rte_prefetch0(&txe->mbuf->pool);
605
606         /* TX loop */
607         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
608                 new_ctx = 0;
609                 tx_pkt = *tx_pkts++;
610                 pkt_len = tx_pkt->pkt_len;
611
612                 /*
613                  * Determine how many (if any) context descriptors
614                  * are needed for offload functionality.
615                  */
616                 ol_flags = tx_pkt->ol_flags;
617
618                 /* If hardware offload required */
619                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
620                 if (tx_ol_req) {
621                         tx_offload.l2_len = tx_pkt->l2_len;
622                         tx_offload.l3_len = tx_pkt->l3_len;
623                         tx_offload.l4_len = tx_pkt->l4_len;
624                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
625                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
626
627                         /* If new context need be built or reuse the exist ctx. */
628                         ctx = what_advctx_update(txq, tx_ol_req,
629                                 tx_offload);
630                         /* Only allocate context descriptor if required*/
631                         new_ctx = (ctx == IXGBE_CTX_NUM);
632                         ctx = txq->ctx_curr;
633                 }
634
635                 /*
636                  * Keep track of how many descriptors are used this loop
637                  * This will always be the number of segments + the number of
638                  * Context descriptors required to transmit the packet
639                  */
640                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
641
642                 if (txp != NULL &&
643                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
644                         /* set RS on the previous packet in the burst */
645                         txp->read.cmd_type_len |=
646                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
647
648                 /*
649                  * The number of descriptors that must be allocated for a
650                  * packet is the number of segments of that packet, plus 1
651                  * Context Descriptor for the hardware offload, if any.
652                  * Determine the last TX descriptor to allocate in the TX ring
653                  * for the packet, starting from the current position (tx_id)
654                  * in the ring.
655                  */
656                 tx_last = (uint16_t) (tx_id + nb_used - 1);
657
658                 /* Circular ring */
659                 if (tx_last >= txq->nb_tx_desc)
660                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
661
662                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
663                            " tx_first=%u tx_last=%u",
664                            (unsigned) txq->port_id,
665                            (unsigned) txq->queue_id,
666                            (unsigned) pkt_len,
667                            (unsigned) tx_id,
668                            (unsigned) tx_last);
669
670                 /*
671                  * Make sure there are enough TX descriptors available to
672                  * transmit the entire packet.
673                  * nb_used better be less than or equal to txq->tx_rs_thresh
674                  */
675                 if (nb_used > txq->nb_tx_free) {
676                         PMD_TX_FREE_LOG(DEBUG,
677                                         "Not enough free TX descriptors "
678                                         "nb_used=%4u nb_free=%4u "
679                                         "(port=%d queue=%d)",
680                                         nb_used, txq->nb_tx_free,
681                                         txq->port_id, txq->queue_id);
682
683                         if (ixgbe_xmit_cleanup(txq) != 0) {
684                                 /* Could not clean any descriptors */
685                                 if (nb_tx == 0)
686                                         return 0;
687                                 goto end_of_tx;
688                         }
689
690                         /* nb_used better be <= txq->tx_rs_thresh */
691                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
692                                 PMD_TX_FREE_LOG(DEBUG,
693                                         "The number of descriptors needed to "
694                                         "transmit the packet exceeds the "
695                                         "RS bit threshold. This will impact "
696                                         "performance."
697                                         "nb_used=%4u nb_free=%4u "
698                                         "tx_rs_thresh=%4u. "
699                                         "(port=%d queue=%d)",
700                                         nb_used, txq->nb_tx_free,
701                                         txq->tx_rs_thresh,
702                                         txq->port_id, txq->queue_id);
703                                 /*
704                                  * Loop here until there are enough TX
705                                  * descriptors or until the ring cannot be
706                                  * cleaned.
707                                  */
708                                 while (nb_used > txq->nb_tx_free) {
709                                         if (ixgbe_xmit_cleanup(txq) != 0) {
710                                                 /*
711                                                  * Could not clean any
712                                                  * descriptors
713                                                  */
714                                                 if (nb_tx == 0)
715                                                         return 0;
716                                                 goto end_of_tx;
717                                         }
718                                 }
719                         }
720                 }
721
722                 /*
723                  * By now there are enough free TX descriptors to transmit
724                  * the packet.
725                  */
726
727                 /*
728                  * Set common flags of all TX Data Descriptors.
729                  *
730                  * The following bits must be set in all Data Descriptors:
731                  *   - IXGBE_ADVTXD_DTYP_DATA
732                  *   - IXGBE_ADVTXD_DCMD_DEXT
733                  *
734                  * The following bits must be set in the first Data Descriptor
735                  * and are ignored in the other ones:
736                  *   - IXGBE_ADVTXD_DCMD_IFCS
737                  *   - IXGBE_ADVTXD_MAC_1588
738                  *   - IXGBE_ADVTXD_DCMD_VLE
739                  *
740                  * The following bits must only be set in the last Data
741                  * Descriptor:
742                  *   - IXGBE_TXD_CMD_EOP
743                  *
744                  * The following bits can be set in any Data Descriptor, but
745                  * are only set in the last Data Descriptor:
746                  *   - IXGBE_TXD_CMD_RS
747                  */
748                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
749                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
750
751 #ifdef RTE_LIBRTE_IEEE1588
752                 if (ol_flags & PKT_TX_IEEE1588_TMST)
753                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
754 #endif
755
756                 olinfo_status = 0;
757                 if (tx_ol_req) {
758
759                         if (ol_flags & PKT_TX_TCP_SEG) {
760                                 /* when TSO is on, paylen in descriptor is the
761                                  * not the packet len but the tcp payload len */
762                                 pkt_len -= (tx_offload.l2_len +
763                                         tx_offload.l3_len + tx_offload.l4_len);
764                         }
765
766                         /*
767                          * Setup the TX Advanced Context Descriptor if required
768                          */
769                         if (new_ctx) {
770                                 volatile struct ixgbe_adv_tx_context_desc *
771                                     ctx_txd;
772
773                                 ctx_txd = (volatile struct
774                                     ixgbe_adv_tx_context_desc *)
775                                     &txr[tx_id];
776
777                                 txn = &sw_ring[txe->next_id];
778                                 rte_prefetch0(&txn->mbuf->pool);
779
780                                 if (txe->mbuf != NULL) {
781                                         rte_pktmbuf_free_seg(txe->mbuf);
782                                         txe->mbuf = NULL;
783                                 }
784
785                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
786                                         tx_offload);
787
788                                 txe->last_id = tx_last;
789                                 tx_id = txe->next_id;
790                                 txe = txn;
791                         }
792
793                         /*
794                          * Setup the TX Advanced Data Descriptor,
795                          * This path will go through
796                          * whatever new/reuse the context descriptor
797                          */
798                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
799                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
800                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
801                 }
802
803                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
804
805                 m_seg = tx_pkt;
806                 do {
807                         txd = &txr[tx_id];
808                         txn = &sw_ring[txe->next_id];
809                         rte_prefetch0(&txn->mbuf->pool);
810
811                         if (txe->mbuf != NULL)
812                                 rte_pktmbuf_free_seg(txe->mbuf);
813                         txe->mbuf = m_seg;
814
815                         /*
816                          * Set up Transmit Data Descriptor.
817                          */
818                         slen = m_seg->data_len;
819                         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
820                         txd->read.buffer_addr =
821                                 rte_cpu_to_le_64(buf_dma_addr);
822                         txd->read.cmd_type_len =
823                                 rte_cpu_to_le_32(cmd_type_len | slen);
824                         txd->read.olinfo_status =
825                                 rte_cpu_to_le_32(olinfo_status);
826                         txe->last_id = tx_last;
827                         tx_id = txe->next_id;
828                         txe = txn;
829                         m_seg = m_seg->next;
830                 } while (m_seg != NULL);
831
832                 /*
833                  * The last packet data descriptor needs End Of Packet (EOP)
834                  */
835                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
836                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
837                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
838
839                 /* Set RS bit only on threshold packets' last descriptor */
840                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
841                         PMD_TX_FREE_LOG(DEBUG,
842                                         "Setting RS bit on TXD id="
843                                         "%4u (port=%d queue=%d)",
844                                         tx_last, txq->port_id, txq->queue_id);
845
846                         cmd_type_len |= IXGBE_TXD_CMD_RS;
847
848                         /* Update txq RS bit counters */
849                         txq->nb_tx_used = 0;
850                         txp = NULL;
851                 } else
852                         txp = txd;
853
854                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
855         }
856
857 end_of_tx:
858         /* set RS on last packet in the burst */
859         if (txp != NULL)
860                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
861
862         rte_wmb();
863
864         /*
865          * Set the Transmit Descriptor Tail (TDT)
866          */
867         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
868                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
869                    (unsigned) tx_id, (unsigned) nb_tx);
870         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
871         txq->tx_tail = tx_id;
872
873         return nb_tx;
874 }
875
876 /*********************************************************************
877  *
878  *  RX functions
879  *
880  **********************************************************************/
881 #define IXGBE_PACKET_TYPE_IPV4              0X01
882 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
883 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
884 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
885 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
886 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
887 #define IXGBE_PACKET_TYPE_IPV6              0X04
888 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
889 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
890 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
891 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
892 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
893 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
894 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
895 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
896 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
897 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
898 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
899 #define IXGBE_PACKET_TYPE_MAX               0X80
900 #define IXGBE_PACKET_TYPE_MASK              0X7F
901 #define IXGBE_PACKET_TYPE_SHIFT             0X04
902 static inline uint32_t
903 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
904 {
905         static const uint32_t
906                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
907                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
908                         RTE_PTYPE_L3_IPV4,
909                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
910                         RTE_PTYPE_L3_IPV4_EXT,
911                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
912                         RTE_PTYPE_L3_IPV6,
913                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
914                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
915                         RTE_PTYPE_INNER_L3_IPV6,
916                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
917                         RTE_PTYPE_L3_IPV6_EXT,
918                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
919                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
920                         RTE_PTYPE_INNER_L3_IPV6_EXT,
921                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
922                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
923                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
924                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
925                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
926                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
927                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
928                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
929                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
930                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
931                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
932                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
933                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
934                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
935                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
936                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
937                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
938                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
939                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
940                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
941                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
942                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
943                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
944                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
945                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
946                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
947                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
948                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
949         };
950         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
951                 return RTE_PTYPE_UNKNOWN;
952
953         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
954                                 IXGBE_PACKET_TYPE_MASK;
955
956         return ptype_table[pkt_info];
957 }
958
959 static inline uint64_t
960 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
961 {
962         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
963                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
964                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
965                 PKT_RX_RSS_HASH, 0, 0, 0,
966                 0, 0, 0,  PKT_RX_FDIR,
967         };
968 #ifdef RTE_LIBRTE_IEEE1588
969         static uint64_t ip_pkt_etqf_map[8] = {
970                 0, 0, 0, PKT_RX_IEEE1588_PTP,
971                 0, 0, 0, 0,
972         };
973
974         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
975                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
976                                 ip_rss_types_map[pkt_info & 0XF];
977         else
978                 return ip_rss_types_map[pkt_info & 0XF];
979 #else
980         return ip_rss_types_map[pkt_info & 0XF];
981 #endif
982 }
983
984 static inline uint64_t
985 rx_desc_status_to_pkt_flags(uint32_t rx_status)
986 {
987         uint64_t pkt_flags;
988
989         /*
990          * Check if VLAN present only.
991          * Do not check whether L3/L4 rx checksum done by NIC or not,
992          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
993          */
994         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
995
996 #ifdef RTE_LIBRTE_IEEE1588
997         if (rx_status & IXGBE_RXD_STAT_TMST)
998                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
999 #endif
1000         return pkt_flags;
1001 }
1002
1003 static inline uint64_t
1004 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1005 {
1006         /*
1007          * Bit 31: IPE, IPv4 checksum error
1008          * Bit 30: L4I, L4I integrity error
1009          */
1010         static uint64_t error_to_pkt_flags_map[4] = {
1011                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1012                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1013         };
1014         return error_to_pkt_flags_map[(rx_status >>
1015                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1016 }
1017
1018 /*
1019  * LOOK_AHEAD defines how many desc statuses to check beyond the
1020  * current descriptor.
1021  * It must be a pound define for optimal performance.
1022  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1023  * function only works with LOOK_AHEAD=8.
1024  */
1025 #define LOOK_AHEAD 8
1026 #if (LOOK_AHEAD != 8)
1027 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1028 #endif
1029 static inline int
1030 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1031 {
1032         volatile union ixgbe_adv_rx_desc *rxdp;
1033         struct ixgbe_rx_entry *rxep;
1034         struct rte_mbuf *mb;
1035         uint16_t pkt_len;
1036         uint64_t pkt_flags;
1037         int nb_dd;
1038         uint32_t s[LOOK_AHEAD];
1039         uint16_t pkt_info[LOOK_AHEAD];
1040         int i, j, nb_rx = 0;
1041         uint32_t status;
1042
1043         /* get references to current descriptor and S/W ring entry */
1044         rxdp = &rxq->rx_ring[rxq->rx_tail];
1045         rxep = &rxq->sw_ring[rxq->rx_tail];
1046
1047         status = rxdp->wb.upper.status_error;
1048         /* check to make sure there is at least 1 packet to receive */
1049         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1050                 return 0;
1051
1052         /*
1053          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1054          * reference packets that are ready to be received.
1055          */
1056         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1057              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1058         {
1059                 /* Read desc statuses backwards to avoid race condition */
1060                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1061                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1062
1063                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1064                         pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1065                                                 hs_rss.pkt_info;
1066
1067                 /* Compute how many status bits were set */
1068                 nb_dd = 0;
1069                 for (j = 0; j < LOOK_AHEAD; ++j)
1070                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1071
1072                 nb_rx += nb_dd;
1073
1074                 /* Translate descriptor info to mbuf format */
1075                 for (j = 0; j < nb_dd; ++j) {
1076                         mb = rxep[j].mbuf;
1077                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1078                                   rxq->crc_len;
1079                         mb->data_len = pkt_len;
1080                         mb->pkt_len = pkt_len;
1081                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1082
1083                         /* convert descriptor fields to rte mbuf flags */
1084                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1085                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1086                         pkt_flags |=
1087                                 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1088                         mb->ol_flags = pkt_flags;
1089                         mb->packet_type =
1090                                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1091
1092                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1093                                 mb->hash.rss = rte_le_to_cpu_32(
1094                                     rxdp[j].wb.lower.hi_dword.rss);
1095                         else if (pkt_flags & PKT_RX_FDIR) {
1096                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1097                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1098                                     IXGBE_ATR_HASH_MASK;
1099                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1100                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1101                         }
1102                 }
1103
1104                 /* Move mbuf pointers from the S/W ring to the stage */
1105                 for (j = 0; j < LOOK_AHEAD; ++j) {
1106                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1107                 }
1108
1109                 /* stop if all requested packets could not be received */
1110                 if (nb_dd != LOOK_AHEAD)
1111                         break;
1112         }
1113
1114         /* clear software ring entries so we can cleanup correctly */
1115         for (i = 0; i < nb_rx; ++i) {
1116                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1117         }
1118
1119
1120         return nb_rx;
1121 }
1122
1123 static inline int
1124 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1125 {
1126         volatile union ixgbe_adv_rx_desc *rxdp;
1127         struct ixgbe_rx_entry *rxep;
1128         struct rte_mbuf *mb;
1129         uint16_t alloc_idx;
1130         __le64 dma_addr;
1131         int diag, i;
1132
1133         /* allocate buffers in bulk directly into the S/W ring */
1134         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1135         rxep = &rxq->sw_ring[alloc_idx];
1136         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1137                                     rxq->rx_free_thresh);
1138         if (unlikely(diag != 0))
1139                 return -ENOMEM;
1140
1141         rxdp = &rxq->rx_ring[alloc_idx];
1142         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1143                 /* populate the static rte mbuf fields */
1144                 mb = rxep[i].mbuf;
1145                 if (reset_mbuf) {
1146                         mb->next = NULL;
1147                         mb->nb_segs = 1;
1148                         mb->port = rxq->port_id;
1149                 }
1150
1151                 rte_mbuf_refcnt_set(mb, 1);
1152                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1153
1154                 /* populate the descriptors */
1155                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
1156                 rxdp[i].read.hdr_addr = 0;
1157                 rxdp[i].read.pkt_addr = dma_addr;
1158         }
1159
1160         /* update state of internal queue structure */
1161         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1162         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1163                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1164
1165         /* no errors */
1166         return 0;
1167 }
1168
1169 static inline uint16_t
1170 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1171                          uint16_t nb_pkts)
1172 {
1173         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1174         int i;
1175
1176         /* how many packets are ready to return? */
1177         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1178
1179         /* copy mbuf pointers to the application's packet list */
1180         for (i = 0; i < nb_pkts; ++i)
1181                 rx_pkts[i] = stage[i];
1182
1183         /* update internal queue state */
1184         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1185         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1186
1187         return nb_pkts;
1188 }
1189
1190 static inline uint16_t
1191 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1192              uint16_t nb_pkts)
1193 {
1194         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1195         uint16_t nb_rx = 0;
1196
1197         /* Any previously recv'd pkts will be returned from the Rx stage */
1198         if (rxq->rx_nb_avail)
1199                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1200
1201         /* Scan the H/W ring for packets to receive */
1202         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1203
1204         /* update internal queue state */
1205         rxq->rx_next_avail = 0;
1206         rxq->rx_nb_avail = nb_rx;
1207         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1208
1209         /* if required, allocate new buffers to replenish descriptors */
1210         if (rxq->rx_tail > rxq->rx_free_trigger) {
1211                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1212
1213                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1214                         int i, j;
1215                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1216                                    "queue_id=%u", (unsigned) rxq->port_id,
1217                                    (unsigned) rxq->queue_id);
1218
1219                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1220                                 rxq->rx_free_thresh;
1221
1222                         /*
1223                          * Need to rewind any previous receives if we cannot
1224                          * allocate new buffers to replenish the old ones.
1225                          */
1226                         rxq->rx_nb_avail = 0;
1227                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1228                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1229                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1230
1231                         return 0;
1232                 }
1233
1234                 /* update tail pointer */
1235                 rte_wmb();
1236                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1237         }
1238
1239         if (rxq->rx_tail >= rxq->nb_rx_desc)
1240                 rxq->rx_tail = 0;
1241
1242         /* received any packets this loop? */
1243         if (rxq->rx_nb_avail)
1244                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1245
1246         return 0;
1247 }
1248
1249 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1250 static uint16_t
1251 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1252                            uint16_t nb_pkts)
1253 {
1254         uint16_t nb_rx;
1255
1256         if (unlikely(nb_pkts == 0))
1257                 return 0;
1258
1259         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1260                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1261
1262         /* request is relatively large, chunk it up */
1263         nb_rx = 0;
1264         while (nb_pkts) {
1265                 uint16_t ret, n;
1266                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1267                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1268                 nb_rx = (uint16_t)(nb_rx + ret);
1269                 nb_pkts = (uint16_t)(nb_pkts - ret);
1270                 if (ret < n)
1271                         break;
1272         }
1273
1274         return nb_rx;
1275 }
1276
1277 uint16_t
1278 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1279                 uint16_t nb_pkts)
1280 {
1281         struct ixgbe_rx_queue *rxq;
1282         volatile union ixgbe_adv_rx_desc *rx_ring;
1283         volatile union ixgbe_adv_rx_desc *rxdp;
1284         struct ixgbe_rx_entry *sw_ring;
1285         struct ixgbe_rx_entry *rxe;
1286         struct rte_mbuf *rxm;
1287         struct rte_mbuf *nmb;
1288         union ixgbe_adv_rx_desc rxd;
1289         uint64_t dma_addr;
1290         uint32_t staterr;
1291         uint32_t pkt_info;
1292         uint16_t pkt_len;
1293         uint16_t rx_id;
1294         uint16_t nb_rx;
1295         uint16_t nb_hold;
1296         uint64_t pkt_flags;
1297
1298         nb_rx = 0;
1299         nb_hold = 0;
1300         rxq = rx_queue;
1301         rx_id = rxq->rx_tail;
1302         rx_ring = rxq->rx_ring;
1303         sw_ring = rxq->sw_ring;
1304         while (nb_rx < nb_pkts) {
1305                 /*
1306                  * The order of operations here is important as the DD status
1307                  * bit must not be read after any other descriptor fields.
1308                  * rx_ring and rxdp are pointing to volatile data so the order
1309                  * of accesses cannot be reordered by the compiler. If they were
1310                  * not volatile, they could be reordered which could lead to
1311                  * using invalid descriptor fields when read from rxd.
1312                  */
1313                 rxdp = &rx_ring[rx_id];
1314                 staterr = rxdp->wb.upper.status_error;
1315                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1316                         break;
1317                 rxd = *rxdp;
1318
1319                 /*
1320                  * End of packet.
1321                  *
1322                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1323                  * is likely to be invalid and to be dropped by the various
1324                  * validation checks performed by the network stack.
1325                  *
1326                  * Allocate a new mbuf to replenish the RX ring descriptor.
1327                  * If the allocation fails:
1328                  *    - arrange for that RX descriptor to be the first one
1329                  *      being parsed the next time the receive function is
1330                  *      invoked [on the same queue].
1331                  *
1332                  *    - Stop parsing the RX ring and return immediately.
1333                  *
1334                  * This policy do not drop the packet received in the RX
1335                  * descriptor for which the allocation of a new mbuf failed.
1336                  * Thus, it allows that packet to be later retrieved if
1337                  * mbuf have been freed in the mean time.
1338                  * As a side effect, holding RX descriptors instead of
1339                  * systematically giving them back to the NIC may lead to
1340                  * RX ring exhaustion situations.
1341                  * However, the NIC can gracefully prevent such situations
1342                  * to happen by sending specific "back-pressure" flow control
1343                  * frames to its peer(s).
1344                  */
1345                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1346                            "ext_err_stat=0x%08x pkt_len=%u",
1347                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1348                            (unsigned) rx_id, (unsigned) staterr,
1349                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1350
1351                 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1352                 if (nmb == NULL) {
1353                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1354                                    "queue_id=%u", (unsigned) rxq->port_id,
1355                                    (unsigned) rxq->queue_id);
1356                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1357                         break;
1358                 }
1359
1360                 nb_hold++;
1361                 rxe = &sw_ring[rx_id];
1362                 rx_id++;
1363                 if (rx_id == rxq->nb_rx_desc)
1364                         rx_id = 0;
1365
1366                 /* Prefetch next mbuf while processing current one. */
1367                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1368
1369                 /*
1370                  * When next RX descriptor is on a cache-line boundary,
1371                  * prefetch the next 4 RX descriptors and the next 8 pointers
1372                  * to mbufs.
1373                  */
1374                 if ((rx_id & 0x3) == 0) {
1375                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1376                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1377                 }
1378
1379                 rxm = rxe->mbuf;
1380                 rxe->mbuf = nmb;
1381                 dma_addr =
1382                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1383                 rxdp->read.hdr_addr = 0;
1384                 rxdp->read.pkt_addr = dma_addr;
1385
1386                 /*
1387                  * Initialize the returned mbuf.
1388                  * 1) setup generic mbuf fields:
1389                  *    - number of segments,
1390                  *    - next segment,
1391                  *    - packet length,
1392                  *    - RX port identifier.
1393                  * 2) integrate hardware offload data, if any:
1394                  *    - RSS flag & hash,
1395                  *    - IP checksum flag,
1396                  *    - VLAN TCI, if any,
1397                  *    - error flags.
1398                  */
1399                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1400                                       rxq->crc_len);
1401                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1402                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1403                 rxm->nb_segs = 1;
1404                 rxm->next = NULL;
1405                 rxm->pkt_len = pkt_len;
1406                 rxm->data_len = pkt_len;
1407                 rxm->port = rxq->port_id;
1408
1409                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1410                                                                 pkt_info);
1411                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1412                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1413
1414                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1415                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1416                 pkt_flags = pkt_flags |
1417                         ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1418                 rxm->ol_flags = pkt_flags;
1419                 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1420
1421                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1422                         rxm->hash.rss = rte_le_to_cpu_32(
1423                                                 rxd.wb.lower.hi_dword.rss);
1424                 else if (pkt_flags & PKT_RX_FDIR) {
1425                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1426                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1427                                         IXGBE_ATR_HASH_MASK;
1428                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1429                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1430                 }
1431                 /*
1432                  * Store the mbuf address into the next entry of the array
1433                  * of returned packets.
1434                  */
1435                 rx_pkts[nb_rx++] = rxm;
1436         }
1437         rxq->rx_tail = rx_id;
1438
1439         /*
1440          * If the number of free RX descriptors is greater than the RX free
1441          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1442          * register.
1443          * Update the RDT with the value of the last processed RX descriptor
1444          * minus 1, to guarantee that the RDT register is never equal to the
1445          * RDH register, which creates a "full" ring situtation from the
1446          * hardware point of view...
1447          */
1448         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1449         if (nb_hold > rxq->rx_free_thresh) {
1450                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1451                            "nb_hold=%u nb_rx=%u",
1452                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1453                            (unsigned) rx_id, (unsigned) nb_hold,
1454                            (unsigned) nb_rx);
1455                 rx_id = (uint16_t) ((rx_id == 0) ?
1456                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1457                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1458                 nb_hold = 0;
1459         }
1460         rxq->nb_rx_hold = nb_hold;
1461         return nb_rx;
1462 }
1463
1464 /**
1465  * Detect an RSC descriptor.
1466  */
1467 static inline uint32_t
1468 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1469 {
1470         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1471                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1472 }
1473
1474 /**
1475  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1476  *
1477  * Fill the following info in the HEAD buffer of the Rx cluster:
1478  *    - RX port identifier
1479  *    - hardware offload data, if any:
1480  *      - RSS flag & hash
1481  *      - IP checksum flag
1482  *      - VLAN TCI, if any
1483  *      - error flags
1484  * @head HEAD of the packet cluster
1485  * @desc HW descriptor to get data from
1486  * @port_id Port ID of the Rx queue
1487  */
1488 static inline void
1489 ixgbe_fill_cluster_head_buf(
1490         struct rte_mbuf *head,
1491         union ixgbe_adv_rx_desc *desc,
1492         uint8_t port_id,
1493         uint32_t staterr)
1494 {
1495         uint16_t pkt_info;
1496         uint64_t pkt_flags;
1497
1498         head->port = port_id;
1499
1500         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1501          * set in the pkt_flags field.
1502          */
1503         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1504         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1505         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1506         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1507         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1508         head->ol_flags = pkt_flags;
1509         head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1510
1511         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1512                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1513         else if (pkt_flags & PKT_RX_FDIR) {
1514                 head->hash.fdir.hash =
1515                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1516                                                           & IXGBE_ATR_HASH_MASK;
1517                 head->hash.fdir.id =
1518                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1519         }
1520 }
1521
1522 /**
1523  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1524  *
1525  * @rx_queue Rx queue handle
1526  * @rx_pkts table of received packets
1527  * @nb_pkts size of rx_pkts table
1528  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1529  *
1530  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1531  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1532  *
1533  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1534  * 1) When non-EOP RSC completion arrives:
1535  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1536  *       segment's data length.
1537  *    b) Set the "next" pointer of the current segment to point to the segment
1538  *       at the NEXTP index.
1539  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1540  *       in the sw_rsc_ring.
1541  * 2) When EOP arrives we just update the cluster's total length and offload
1542  *    flags and deliver the cluster up to the upper layers. In our case - put it
1543  *    in the rx_pkts table.
1544  *
1545  * Returns the number of received packets/clusters (according to the "bulk
1546  * receive" interface).
1547  */
1548 static inline uint16_t
1549 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1550                     bool bulk_alloc)
1551 {
1552         struct ixgbe_rx_queue *rxq = rx_queue;
1553         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1554         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1555         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1556         uint16_t rx_id = rxq->rx_tail;
1557         uint16_t nb_rx = 0;
1558         uint16_t nb_hold = rxq->nb_rx_hold;
1559         uint16_t prev_id = rxq->rx_tail;
1560
1561         while (nb_rx < nb_pkts) {
1562                 bool eop;
1563                 struct ixgbe_rx_entry *rxe;
1564                 struct ixgbe_scattered_rx_entry *sc_entry;
1565                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1566                 struct ixgbe_rx_entry *next_rxe;
1567                 struct rte_mbuf *first_seg;
1568                 struct rte_mbuf *rxm;
1569                 struct rte_mbuf *nmb;
1570                 union ixgbe_adv_rx_desc rxd;
1571                 uint16_t data_len;
1572                 uint16_t next_id;
1573                 volatile union ixgbe_adv_rx_desc *rxdp;
1574                 uint32_t staterr;
1575
1576 next_desc:
1577                 /*
1578                  * The code in this whole file uses the volatile pointer to
1579                  * ensure the read ordering of the status and the rest of the
1580                  * descriptor fields (on the compiler level only!!!). This is so
1581                  * UGLY - why not to just use the compiler barrier instead? DPDK
1582                  * even has the rte_compiler_barrier() for that.
1583                  *
1584                  * But most importantly this is just wrong because this doesn't
1585                  * ensure memory ordering in a general case at all. For
1586                  * instance, DPDK is supposed to work on Power CPUs where
1587                  * compiler barrier may just not be enough!
1588                  *
1589                  * I tried to write only this function properly to have a
1590                  * starting point (as a part of an LRO/RSC series) but the
1591                  * compiler cursed at me when I tried to cast away the
1592                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1593                  * keeping it the way it is for now.
1594                  *
1595                  * The code in this file is broken in so many other places and
1596                  * will just not work on a big endian CPU anyway therefore the
1597                  * lines below will have to be revisited together with the rest
1598                  * of the ixgbe PMD.
1599                  *
1600                  * TODO:
1601                  *    - Get rid of "volatile" crap and let the compiler do its
1602                  *      job.
1603                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1604                  *      memory ordering below.
1605                  */
1606                 rxdp = &rx_ring[rx_id];
1607                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1608
1609                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1610                         break;
1611
1612                 rxd = *rxdp;
1613
1614                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1615                                   "staterr=0x%x data_len=%u",
1616                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1617                            rte_le_to_cpu_16(rxd.wb.upper.length));
1618
1619                 if (!bulk_alloc) {
1620                         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1621                         if (nmb == NULL) {
1622                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1623                                                   "port_id=%u queue_id=%u",
1624                                            rxq->port_id, rxq->queue_id);
1625
1626                                 rte_eth_devices[rxq->port_id].data->
1627                                                         rx_mbuf_alloc_failed++;
1628                                 break;
1629                         }
1630                 }
1631                 else if (nb_hold > rxq->rx_free_thresh) {
1632                         uint16_t next_rdt = rxq->rx_free_trigger;
1633
1634                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1635                                 rte_wmb();
1636                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1637                                                     next_rdt);
1638                                 nb_hold -= rxq->rx_free_thresh;
1639                         } else {
1640                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1641                                                   "port_id=%u queue_id=%u",
1642                                            rxq->port_id, rxq->queue_id);
1643
1644                                 rte_eth_devices[rxq->port_id].data->
1645                                                         rx_mbuf_alloc_failed++;
1646                                 break;
1647                         }
1648                 }
1649
1650                 nb_hold++;
1651                 rxe = &sw_ring[rx_id];
1652                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1653
1654                 next_id = rx_id + 1;
1655                 if (next_id == rxq->nb_rx_desc)
1656                         next_id = 0;
1657
1658                 /* Prefetch next mbuf while processing current one. */
1659                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1660
1661                 /*
1662                  * When next RX descriptor is on a cache-line boundary,
1663                  * prefetch the next 4 RX descriptors and the next 4 pointers
1664                  * to mbufs.
1665                  */
1666                 if ((next_id & 0x3) == 0) {
1667                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1668                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1669                 }
1670
1671                 rxm = rxe->mbuf;
1672
1673                 if (!bulk_alloc) {
1674                         __le64 dma =
1675                           rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1676                         /*
1677                          * Update RX descriptor with the physical address of the
1678                          * new data buffer of the new allocated mbuf.
1679                          */
1680                         rxe->mbuf = nmb;
1681
1682                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1683                         rxdp->read.hdr_addr = 0;
1684                         rxdp->read.pkt_addr = dma;
1685                 } else
1686                         rxe->mbuf = NULL;
1687
1688                 /*
1689                  * Set data length & data buffer address of mbuf.
1690                  */
1691                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1692                 rxm->data_len = data_len;
1693
1694                 if (!eop) {
1695                         uint16_t nextp_id;
1696                         /*
1697                          * Get next descriptor index:
1698                          *  - For RSC it's in the NEXTP field.
1699                          *  - For a scattered packet - it's just a following
1700                          *    descriptor.
1701                          */
1702                         if (ixgbe_rsc_count(&rxd))
1703                                 nextp_id =
1704                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1705                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1706                         else
1707                                 nextp_id = next_id;
1708
1709                         next_sc_entry = &sw_sc_ring[nextp_id];
1710                         next_rxe = &sw_ring[nextp_id];
1711                         rte_ixgbe_prefetch(next_rxe);
1712                 }
1713
1714                 sc_entry = &sw_sc_ring[rx_id];
1715                 first_seg = sc_entry->fbuf;
1716                 sc_entry->fbuf = NULL;
1717
1718                 /*
1719                  * If this is the first buffer of the received packet,
1720                  * set the pointer to the first mbuf of the packet and
1721                  * initialize its context.
1722                  * Otherwise, update the total length and the number of segments
1723                  * of the current scattered packet, and update the pointer to
1724                  * the last mbuf of the current packet.
1725                  */
1726                 if (first_seg == NULL) {
1727                         first_seg = rxm;
1728                         first_seg->pkt_len = data_len;
1729                         first_seg->nb_segs = 1;
1730                 } else {
1731                         first_seg->pkt_len += data_len;
1732                         first_seg->nb_segs++;
1733                 }
1734
1735                 prev_id = rx_id;
1736                 rx_id = next_id;
1737
1738                 /*
1739                  * If this is not the last buffer of the received packet, update
1740                  * the pointer to the first mbuf at the NEXTP entry in the
1741                  * sw_sc_ring and continue to parse the RX ring.
1742                  */
1743                 if (!eop) {
1744                         rxm->next = next_rxe->mbuf;
1745                         next_sc_entry->fbuf = first_seg;
1746                         goto next_desc;
1747                 }
1748
1749                 /*
1750                  * This is the last buffer of the received packet - return
1751                  * the current cluster to the user.
1752                  */
1753                 rxm->next = NULL;
1754
1755                 /* Initialize the first mbuf of the returned packet */
1756                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1757                                             staterr);
1758
1759                 /*
1760                  * Deal with the case, when HW CRC srip is disabled.
1761                  * That can't happen when LRO is enabled, but still could
1762                  * happen for scattered RX mode.
1763                  */
1764                 first_seg->pkt_len -= rxq->crc_len;
1765                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1766                         struct rte_mbuf *lp;
1767
1768                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
1769                                 ;
1770
1771                         first_seg->nb_segs--;
1772                         lp->data_len -= rxq->crc_len - rxm->data_len;
1773                         lp->next = NULL;
1774                         rte_pktmbuf_free_seg(rxm);
1775                 } else
1776                         rxm->data_len -= rxq->crc_len;
1777
1778                 /* Prefetch data of first segment, if configured to do so. */
1779                 rte_packet_prefetch((char *)first_seg->buf_addr +
1780                         first_seg->data_off);
1781
1782                 /*
1783                  * Store the mbuf address into the next entry of the array
1784                  * of returned packets.
1785                  */
1786                 rx_pkts[nb_rx++] = first_seg;
1787         }
1788
1789         /*
1790          * Record index of the next RX descriptor to probe.
1791          */
1792         rxq->rx_tail = rx_id;
1793
1794         /*
1795          * If the number of free RX descriptors is greater than the RX free
1796          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1797          * register.
1798          * Update the RDT with the value of the last processed RX descriptor
1799          * minus 1, to guarantee that the RDT register is never equal to the
1800          * RDH register, which creates a "full" ring situtation from the
1801          * hardware point of view...
1802          */
1803         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805                            "nb_hold=%u nb_rx=%u",
1806                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1807
1808                 rte_wmb();
1809                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1810                 nb_hold = 0;
1811         }
1812
1813         rxq->nb_rx_hold = nb_hold;
1814         return nb_rx;
1815 }
1816
1817 uint16_t
1818 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1819                                  uint16_t nb_pkts)
1820 {
1821         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1822 }
1823
1824 uint16_t
1825 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1826                                uint16_t nb_pkts)
1827 {
1828         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1829 }
1830
1831 /*********************************************************************
1832  *
1833  *  Queue management functions
1834  *
1835  **********************************************************************/
1836
1837 static void __attribute__((cold))
1838 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1839 {
1840         unsigned i;
1841
1842         if (txq->sw_ring != NULL) {
1843                 for (i = 0; i < txq->nb_tx_desc; i++) {
1844                         if (txq->sw_ring[i].mbuf != NULL) {
1845                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1846                                 txq->sw_ring[i].mbuf = NULL;
1847                         }
1848                 }
1849         }
1850 }
1851
1852 static void __attribute__((cold))
1853 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1854 {
1855         if (txq != NULL &&
1856             txq->sw_ring != NULL)
1857                 rte_free(txq->sw_ring);
1858 }
1859
1860 static void __attribute__((cold))
1861 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1862 {
1863         if (txq != NULL && txq->ops != NULL) {
1864                 txq->ops->release_mbufs(txq);
1865                 txq->ops->free_swring(txq);
1866                 rte_free(txq);
1867         }
1868 }
1869
1870 void __attribute__((cold))
1871 ixgbe_dev_tx_queue_release(void *txq)
1872 {
1873         ixgbe_tx_queue_release(txq);
1874 }
1875
1876 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1877 static void __attribute__((cold))
1878 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1879 {
1880         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1881         struct ixgbe_tx_entry *txe = txq->sw_ring;
1882         uint16_t prev, i;
1883
1884         /* Zero out HW ring memory */
1885         for (i = 0; i < txq->nb_tx_desc; i++) {
1886                 txq->tx_ring[i] = zeroed_desc;
1887         }
1888
1889         /* Initialize SW ring entries */
1890         prev = (uint16_t) (txq->nb_tx_desc - 1);
1891         for (i = 0; i < txq->nb_tx_desc; i++) {
1892                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1893                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1894                 txe[i].mbuf = NULL;
1895                 txe[i].last_id = i;
1896                 txe[prev].next_id = i;
1897                 prev = i;
1898         }
1899
1900         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1901         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1902
1903         txq->tx_tail = 0;
1904         txq->nb_tx_used = 0;
1905         /*
1906          * Always allow 1 descriptor to be un-allocated to avoid
1907          * a H/W race condition
1908          */
1909         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1910         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1911         txq->ctx_curr = 0;
1912         memset((void*)&txq->ctx_cache, 0,
1913                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1914 }
1915
1916 static const struct ixgbe_txq_ops def_txq_ops = {
1917         .release_mbufs = ixgbe_tx_queue_release_mbufs,
1918         .free_swring = ixgbe_tx_free_swring,
1919         .reset = ixgbe_reset_tx_queue,
1920 };
1921
1922 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1923  * the queue parameters. Used in tx_queue_setup by primary process and then
1924  * in dev_init by secondary process when attaching to an existing ethdev.
1925  */
1926 void __attribute__((cold))
1927 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1928 {
1929         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1930         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1931                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1932                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1933 #ifdef RTE_IXGBE_INC_VECTOR
1934                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1935                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1936                                         ixgbe_txq_vec_setup(txq) == 0)) {
1937                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1938                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1939                 } else
1940 #endif
1941                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1942         } else {
1943                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1944                 PMD_INIT_LOG(DEBUG,
1945                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1946                                 (unsigned long)txq->txq_flags,
1947                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
1948                 PMD_INIT_LOG(DEBUG,
1949                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1950                                 (unsigned long)txq->tx_rs_thresh,
1951                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1952                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1953         }
1954 }
1955
1956 int __attribute__((cold))
1957 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1958                          uint16_t queue_idx,
1959                          uint16_t nb_desc,
1960                          unsigned int socket_id,
1961                          const struct rte_eth_txconf *tx_conf)
1962 {
1963         const struct rte_memzone *tz;
1964         struct ixgbe_tx_queue *txq;
1965         struct ixgbe_hw     *hw;
1966         uint16_t tx_rs_thresh, tx_free_thresh;
1967
1968         PMD_INIT_FUNC_TRACE();
1969         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1970
1971         /*
1972          * Validate number of transmit descriptors.
1973          * It must not exceed hardware maximum, and must be multiple
1974          * of IXGBE_ALIGN.
1975          */
1976         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
1977                         (nb_desc > IXGBE_MAX_RING_DESC) ||
1978                         (nb_desc < IXGBE_MIN_RING_DESC)) {
1979                 return -EINVAL;
1980         }
1981
1982         /*
1983          * The following two parameters control the setting of the RS bit on
1984          * transmit descriptors.
1985          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1986          * descriptors have been used.
1987          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1988          * descriptors are used or if the number of descriptors required
1989          * to transmit a packet is greater than the number of free TX
1990          * descriptors.
1991          * The following constraints must be satisfied:
1992          *  tx_rs_thresh must be greater than 0.
1993          *  tx_rs_thresh must be less than the size of the ring minus 2.
1994          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
1995          *  tx_rs_thresh must be a divisor of the ring size.
1996          *  tx_free_thresh must be greater than 0.
1997          *  tx_free_thresh must be less than the size of the ring minus 3.
1998          * One descriptor in the TX ring is used as a sentinel to avoid a
1999          * H/W race condition, hence the maximum threshold constraints.
2000          * When set to zero use default values.
2001          */
2002         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2003                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2004         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2005                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2006         if (tx_rs_thresh >= (nb_desc - 2)) {
2007                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2008                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2009                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2010                         (int)dev->data->port_id, (int)queue_idx);
2011                 return -(EINVAL);
2012         }
2013         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2014                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2015                         "(tx_rs_thresh=%u port=%d queue=%d)",
2016                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2017                         (int)dev->data->port_id, (int)queue_idx);
2018                 return -(EINVAL);
2019         }
2020         if (tx_free_thresh >= (nb_desc - 3)) {
2021                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2022                              "tx_free_thresh must be less than the number of "
2023                              "TX descriptors minus 3. (tx_free_thresh=%u "
2024                              "port=%d queue=%d)",
2025                              (unsigned int)tx_free_thresh,
2026                              (int)dev->data->port_id, (int)queue_idx);
2027                 return -(EINVAL);
2028         }
2029         if (tx_rs_thresh > tx_free_thresh) {
2030                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2031                              "tx_free_thresh. (tx_free_thresh=%u "
2032                              "tx_rs_thresh=%u port=%d queue=%d)",
2033                              (unsigned int)tx_free_thresh,
2034                              (unsigned int)tx_rs_thresh,
2035                              (int)dev->data->port_id,
2036                              (int)queue_idx);
2037                 return -(EINVAL);
2038         }
2039         if ((nb_desc % tx_rs_thresh) != 0) {
2040                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2041                              "number of TX descriptors. (tx_rs_thresh=%u "
2042                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2043                              (int)dev->data->port_id, (int)queue_idx);
2044                 return -(EINVAL);
2045         }
2046
2047         /*
2048          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2049          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2050          * by the NIC and all descriptors are written back after the NIC
2051          * accumulates WTHRESH descriptors.
2052          */
2053         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2054                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2055                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2056                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2057                              (int)dev->data->port_id, (int)queue_idx);
2058                 return -(EINVAL);
2059         }
2060
2061         /* Free memory prior to re-allocation if needed... */
2062         if (dev->data->tx_queues[queue_idx] != NULL) {
2063                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2064                 dev->data->tx_queues[queue_idx] = NULL;
2065         }
2066
2067         /* First allocate the tx queue data structure */
2068         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2069                                  RTE_CACHE_LINE_SIZE, socket_id);
2070         if (txq == NULL)
2071                 return -ENOMEM;
2072
2073         /*
2074          * Allocate TX ring hardware descriptors. A memzone large enough to
2075          * handle the maximum ring size is allocated in order to allow for
2076          * resizing in later calls to the queue setup function.
2077          */
2078         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2079                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2080                         IXGBE_ALIGN, socket_id);
2081         if (tz == NULL) {
2082                 ixgbe_tx_queue_release(txq);
2083                 return -ENOMEM;
2084         }
2085
2086         txq->nb_tx_desc = nb_desc;
2087         txq->tx_rs_thresh = tx_rs_thresh;
2088         txq->tx_free_thresh = tx_free_thresh;
2089         txq->pthresh = tx_conf->tx_thresh.pthresh;
2090         txq->hthresh = tx_conf->tx_thresh.hthresh;
2091         txq->wthresh = tx_conf->tx_thresh.wthresh;
2092         txq->queue_id = queue_idx;
2093         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2094                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2095         txq->port_id = dev->data->port_id;
2096         txq->txq_flags = tx_conf->txq_flags;
2097         txq->ops = &def_txq_ops;
2098         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2099
2100         /*
2101          * Modification to set VFTDT for virtual function if vf is detected
2102          */
2103         if (hw->mac.type == ixgbe_mac_82599_vf ||
2104             hw->mac.type == ixgbe_mac_X540_vf ||
2105             hw->mac.type == ixgbe_mac_X550_vf ||
2106             hw->mac.type == ixgbe_mac_X550EM_x_vf)
2107                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2108         else
2109                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2110
2111         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2112         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2113
2114         /* Allocate software ring */
2115         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2116                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2117                                 RTE_CACHE_LINE_SIZE, socket_id);
2118         if (txq->sw_ring == NULL) {
2119                 ixgbe_tx_queue_release(txq);
2120                 return -ENOMEM;
2121         }
2122         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2123                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2124
2125         /* set up vector or scalar TX function as appropriate */
2126         ixgbe_set_tx_function(dev, txq);
2127
2128         txq->ops->reset(txq);
2129
2130         dev->data->tx_queues[queue_idx] = txq;
2131
2132
2133         return 0;
2134 }
2135
2136 /**
2137  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2138  *
2139  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2140  * in the sw_rsc_ring is not set to NULL but rather points to the next
2141  * mbuf of this RSC aggregation (that has not been completed yet and still
2142  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2143  * will just free first "nb_segs" segments of the cluster explicitly by calling
2144  * an rte_pktmbuf_free_seg().
2145  *
2146  * @m scattered cluster head
2147  */
2148 static void __attribute__((cold))
2149 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2150 {
2151         uint8_t i, nb_segs = m->nb_segs;
2152         struct rte_mbuf *next_seg;
2153
2154         for (i = 0; i < nb_segs; i++) {
2155                 next_seg = m->next;
2156                 rte_pktmbuf_free_seg(m);
2157                 m = next_seg;
2158         }
2159 }
2160
2161 static void __attribute__((cold))
2162 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2163 {
2164         unsigned i;
2165
2166 #ifdef RTE_IXGBE_INC_VECTOR
2167         /* SSE Vector driver has a different way of releasing mbufs. */
2168         if (rxq->rx_using_sse) {
2169                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2170                 return;
2171         }
2172 #endif
2173
2174         if (rxq->sw_ring != NULL) {
2175                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2176                         if (rxq->sw_ring[i].mbuf != NULL) {
2177                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2178                                 rxq->sw_ring[i].mbuf = NULL;
2179                         }
2180                 }
2181                 if (rxq->rx_nb_avail) {
2182                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2183                                 struct rte_mbuf *mb;
2184                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2185                                 rte_pktmbuf_free_seg(mb);
2186                         }
2187                         rxq->rx_nb_avail = 0;
2188                 }
2189         }
2190
2191         if (rxq->sw_sc_ring)
2192                 for (i = 0; i < rxq->nb_rx_desc; i++)
2193                         if (rxq->sw_sc_ring[i].fbuf) {
2194                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2195                                 rxq->sw_sc_ring[i].fbuf = NULL;
2196                         }
2197 }
2198
2199 static void __attribute__((cold))
2200 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2201 {
2202         if (rxq != NULL) {
2203                 ixgbe_rx_queue_release_mbufs(rxq);
2204                 rte_free(rxq->sw_ring);
2205                 rte_free(rxq->sw_sc_ring);
2206                 rte_free(rxq);
2207         }
2208 }
2209
2210 void __attribute__((cold))
2211 ixgbe_dev_rx_queue_release(void *rxq)
2212 {
2213         ixgbe_rx_queue_release(rxq);
2214 }
2215
2216 /*
2217  * Check if Rx Burst Bulk Alloc function can be used.
2218  * Return
2219  *        0: the preconditions are satisfied and the bulk allocation function
2220  *           can be used.
2221  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2222  *           function must be used.
2223  */
2224 static inline int __attribute__((cold))
2225 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2226 {
2227         int ret = 0;
2228
2229         /*
2230          * Make sure the following pre-conditions are satisfied:
2231          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2232          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2233          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2234          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2235          * Scattered packets are not supported.  This should be checked
2236          * outside of this function.
2237          */
2238         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2239                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2240                              "rxq->rx_free_thresh=%d, "
2241                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2242                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2243                 ret = -EINVAL;
2244         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2245                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2246                              "rxq->rx_free_thresh=%d, "
2247                              "rxq->nb_rx_desc=%d",
2248                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2249                 ret = -EINVAL;
2250         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2251                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2252                              "rxq->nb_rx_desc=%d, "
2253                              "rxq->rx_free_thresh=%d",
2254                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2255                 ret = -EINVAL;
2256         } else if (!(rxq->nb_rx_desc <
2257                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2258                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2259                              "rxq->nb_rx_desc=%d, "
2260                              "IXGBE_MAX_RING_DESC=%d, "
2261                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2262                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2263                              RTE_PMD_IXGBE_RX_MAX_BURST);
2264                 ret = -EINVAL;
2265         }
2266
2267         return ret;
2268 }
2269
2270 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2271 static void __attribute__((cold))
2272 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2273 {
2274         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2275         unsigned i;
2276         uint16_t len = rxq->nb_rx_desc;
2277
2278         /*
2279          * By default, the Rx queue setup function allocates enough memory for
2280          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2281          * extra memory at the end of the descriptor ring to be zero'd out. A
2282          * pre-condition for using the Rx burst bulk alloc function is that the
2283          * number of descriptors is less than or equal to
2284          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2285          * constraints here to see if we need to zero out memory after the end
2286          * of the H/W descriptor ring.
2287          */
2288         if (adapter->rx_bulk_alloc_allowed)
2289                 /* zero out extra memory */
2290                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2291
2292         /*
2293          * Zero out HW ring memory. Zero out extra memory at the end of
2294          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2295          * reads extra memory as zeros.
2296          */
2297         for (i = 0; i < len; i++) {
2298                 rxq->rx_ring[i] = zeroed_desc;
2299         }
2300
2301         /*
2302          * initialize extra software ring entries. Space for these extra
2303          * entries is always allocated
2304          */
2305         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2306         for (i = rxq->nb_rx_desc; i < len; ++i) {
2307                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2308         }
2309
2310         rxq->rx_nb_avail = 0;
2311         rxq->rx_next_avail = 0;
2312         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2313         rxq->rx_tail = 0;
2314         rxq->nb_rx_hold = 0;
2315         rxq->pkt_first_seg = NULL;
2316         rxq->pkt_last_seg = NULL;
2317
2318 #ifdef RTE_IXGBE_INC_VECTOR
2319         rxq->rxrearm_start = 0;
2320         rxq->rxrearm_nb = 0;
2321 #endif
2322 }
2323
2324 int __attribute__((cold))
2325 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2326                          uint16_t queue_idx,
2327                          uint16_t nb_desc,
2328                          unsigned int socket_id,
2329                          const struct rte_eth_rxconf *rx_conf,
2330                          struct rte_mempool *mp)
2331 {
2332         const struct rte_memzone *rz;
2333         struct ixgbe_rx_queue *rxq;
2334         struct ixgbe_hw     *hw;
2335         uint16_t len;
2336         struct ixgbe_adapter *adapter =
2337                 (struct ixgbe_adapter *)dev->data->dev_private;
2338
2339         PMD_INIT_FUNC_TRACE();
2340         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2341
2342         /*
2343          * Validate number of receive descriptors.
2344          * It must not exceed hardware maximum, and must be multiple
2345          * of IXGBE_ALIGN.
2346          */
2347         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2348                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2349                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2350                 return -EINVAL;
2351         }
2352
2353         /* Free memory prior to re-allocation if needed... */
2354         if (dev->data->rx_queues[queue_idx] != NULL) {
2355                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2356                 dev->data->rx_queues[queue_idx] = NULL;
2357         }
2358
2359         /* First allocate the rx queue data structure */
2360         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2361                                  RTE_CACHE_LINE_SIZE, socket_id);
2362         if (rxq == NULL)
2363                 return -ENOMEM;
2364         rxq->mb_pool = mp;
2365         rxq->nb_rx_desc = nb_desc;
2366         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2367         rxq->queue_id = queue_idx;
2368         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2369                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2370         rxq->port_id = dev->data->port_id;
2371         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2372                                                         0 : ETHER_CRC_LEN);
2373         rxq->drop_en = rx_conf->rx_drop_en;
2374         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2375
2376         /*
2377          * Allocate RX ring hardware descriptors. A memzone large enough to
2378          * handle the maximum ring size is allocated in order to allow for
2379          * resizing in later calls to the queue setup function.
2380          */
2381         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2382                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2383         if (rz == NULL) {
2384                 ixgbe_rx_queue_release(rxq);
2385                 return -ENOMEM;
2386         }
2387
2388         /*
2389          * Zero init all the descriptors in the ring.
2390          */
2391         memset (rz->addr, 0, RX_RING_SZ);
2392
2393         /*
2394          * Modified to setup VFRDT for Virtual Function
2395          */
2396         if (hw->mac.type == ixgbe_mac_82599_vf ||
2397             hw->mac.type == ixgbe_mac_X540_vf ||
2398             hw->mac.type == ixgbe_mac_X550_vf ||
2399             hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2400                 rxq->rdt_reg_addr =
2401                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2402                 rxq->rdh_reg_addr =
2403                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2404         }
2405         else {
2406                 rxq->rdt_reg_addr =
2407                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2408                 rxq->rdh_reg_addr =
2409                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2410         }
2411
2412         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2413         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2414
2415         /*
2416          * Certain constraints must be met in order to use the bulk buffer
2417          * allocation Rx burst function. If any of Rx queues doesn't meet them
2418          * the feature should be disabled for the whole port.
2419          */
2420         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2421                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2422                                     "preconditions - canceling the feature for "
2423                                     "the whole port[%d]",
2424                              rxq->queue_id, rxq->port_id);
2425                 adapter->rx_bulk_alloc_allowed = false;
2426         }
2427
2428         /*
2429          * Allocate software ring. Allow for space at the end of the
2430          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2431          * function does not access an invalid memory region.
2432          */
2433         len = nb_desc;
2434         if (adapter->rx_bulk_alloc_allowed)
2435                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2436
2437         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2438                                           sizeof(struct ixgbe_rx_entry) * len,
2439                                           RTE_CACHE_LINE_SIZE, socket_id);
2440         if (!rxq->sw_ring) {
2441                 ixgbe_rx_queue_release(rxq);
2442                 return -ENOMEM;
2443         }
2444
2445         /*
2446          * Always allocate even if it's not going to be needed in order to
2447          * simplify the code.
2448          *
2449          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2450          * be requested in ixgbe_dev_rx_init(), which is called later from
2451          * dev_start() flow.
2452          */
2453         rxq->sw_sc_ring =
2454                 rte_zmalloc_socket("rxq->sw_sc_ring",
2455                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2456                                    RTE_CACHE_LINE_SIZE, socket_id);
2457         if (!rxq->sw_sc_ring) {
2458                 ixgbe_rx_queue_release(rxq);
2459                 return -ENOMEM;
2460         }
2461
2462         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2463                             "dma_addr=0x%"PRIx64,
2464                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2465                      rxq->rx_ring_phys_addr);
2466
2467         if (!rte_is_power_of_2(nb_desc)) {
2468                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2469                                     "preconditions - canceling the feature for "
2470                                     "the whole port[%d]",
2471                              rxq->queue_id, rxq->port_id);
2472                 adapter->rx_vec_allowed = false;
2473         } else
2474                 ixgbe_rxq_vec_setup(rxq);
2475
2476         dev->data->rx_queues[queue_idx] = rxq;
2477
2478         ixgbe_reset_rx_queue(adapter, rxq);
2479
2480         return 0;
2481 }
2482
2483 uint32_t
2484 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2485 {
2486 #define IXGBE_RXQ_SCAN_INTERVAL 4
2487         volatile union ixgbe_adv_rx_desc *rxdp;
2488         struct ixgbe_rx_queue *rxq;
2489         uint32_t desc = 0;
2490
2491         if (rx_queue_id >= dev->data->nb_rx_queues) {
2492                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2493                 return 0;
2494         }
2495
2496         rxq = dev->data->rx_queues[rx_queue_id];
2497         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2498
2499         while ((desc < rxq->nb_rx_desc) &&
2500                 (rxdp->wb.upper.status_error &
2501                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2502                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2503                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2504                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2505                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2506                                 desc - rxq->nb_rx_desc]);
2507         }
2508
2509         return desc;
2510 }
2511
2512 int
2513 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2514 {
2515         volatile union ixgbe_adv_rx_desc *rxdp;
2516         struct ixgbe_rx_queue *rxq = rx_queue;
2517         uint32_t desc;
2518
2519         if (unlikely(offset >= rxq->nb_rx_desc))
2520                 return 0;
2521         desc = rxq->rx_tail + offset;
2522         if (desc >= rxq->nb_rx_desc)
2523                 desc -= rxq->nb_rx_desc;
2524
2525         rxdp = &rxq->rx_ring[desc];
2526         return !!(rxdp->wb.upper.status_error &
2527                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2528 }
2529
2530 void __attribute__((cold))
2531 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2532 {
2533         unsigned i;
2534         struct ixgbe_adapter *adapter =
2535                 (struct ixgbe_adapter *)dev->data->dev_private;
2536
2537         PMD_INIT_FUNC_TRACE();
2538
2539         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2540                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2541                 if (txq != NULL) {
2542                         txq->ops->release_mbufs(txq);
2543                         txq->ops->reset(txq);
2544                 }
2545         }
2546
2547         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2548                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2549                 if (rxq != NULL) {
2550                         ixgbe_rx_queue_release_mbufs(rxq);
2551                         ixgbe_reset_rx_queue(adapter, rxq);
2552                 }
2553         }
2554 }
2555
2556 void
2557 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2558 {
2559         unsigned i;
2560
2561         PMD_INIT_FUNC_TRACE();
2562
2563         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2564                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2565                 dev->data->rx_queues[i] = NULL;
2566         }
2567         dev->data->nb_rx_queues = 0;
2568
2569         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2570                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2571                 dev->data->tx_queues[i] = NULL;
2572         }
2573         dev->data->nb_tx_queues = 0;
2574 }
2575
2576 /*********************************************************************
2577  *
2578  *  Device RX/TX init functions
2579  *
2580  **********************************************************************/
2581
2582 /**
2583  * Receive Side Scaling (RSS)
2584  * See section 7.1.2.8 in the following document:
2585  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2586  *
2587  * Principles:
2588  * The source and destination IP addresses of the IP header and the source
2589  * and destination ports of TCP/UDP headers, if any, of received packets are
2590  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2591  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2592  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2593  * RSS output index which is used as the RX queue index where to store the
2594  * received packets.
2595  * The following output is supplied in the RX write-back descriptor:
2596  *     - 32-bit result of the Microsoft RSS hash function,
2597  *     - 4-bit RSS type field.
2598  */
2599
2600 /*
2601  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2602  * Used as the default key.
2603  */
2604 static uint8_t rss_intel_key[40] = {
2605         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2606         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2607         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2608         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2609         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2610 };
2611
2612 static void
2613 ixgbe_rss_disable(struct rte_eth_dev *dev)
2614 {
2615         struct ixgbe_hw *hw;
2616         uint32_t mrqc;
2617         uint32_t mrqc_reg;
2618
2619         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2620         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2621         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2622         mrqc &= ~IXGBE_MRQC_RSSEN;
2623         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2624 }
2625
2626 static void
2627 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2628 {
2629         uint8_t  *hash_key;
2630         uint32_t mrqc;
2631         uint32_t rss_key;
2632         uint64_t rss_hf;
2633         uint16_t i;
2634         uint32_t mrqc_reg;
2635         uint32_t rssrk_reg;
2636
2637         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2638         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2639
2640         hash_key = rss_conf->rss_key;
2641         if (hash_key != NULL) {
2642                 /* Fill in RSS hash key */
2643                 for (i = 0; i < 10; i++) {
2644                         rss_key  = hash_key[(i * 4)];
2645                         rss_key |= hash_key[(i * 4) + 1] << 8;
2646                         rss_key |= hash_key[(i * 4) + 2] << 16;
2647                         rss_key |= hash_key[(i * 4) + 3] << 24;
2648                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2649                 }
2650         }
2651
2652         /* Set configured hashing protocols in MRQC register */
2653         rss_hf = rss_conf->rss_hf;
2654         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2655         if (rss_hf & ETH_RSS_IPV4)
2656                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2657         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2658                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2659         if (rss_hf & ETH_RSS_IPV6)
2660                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2661         if (rss_hf & ETH_RSS_IPV6_EX)
2662                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2663         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2664                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2665         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2666                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2667         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2668                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2669         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2670                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2671         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2672                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2673         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2674 }
2675
2676 int
2677 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2678                           struct rte_eth_rss_conf *rss_conf)
2679 {
2680         struct ixgbe_hw *hw;
2681         uint32_t mrqc;
2682         uint64_t rss_hf;
2683         uint32_t mrqc_reg;
2684
2685         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2686
2687         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2688                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2689                         "NIC.");
2690                 return -ENOTSUP;
2691         }
2692         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2693
2694         /*
2695          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2696          *     "RSS enabling cannot be done dynamically while it must be
2697          *      preceded by a software reset"
2698          * Before changing anything, first check that the update RSS operation
2699          * does not attempt to disable RSS, if RSS was enabled at
2700          * initialization time, or does not attempt to enable RSS, if RSS was
2701          * disabled at initialization time.
2702          */
2703         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2704         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2705         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2706                 if (rss_hf != 0) /* Enable RSS */
2707                         return -(EINVAL);
2708                 return 0; /* Nothing to do */
2709         }
2710         /* RSS enabled */
2711         if (rss_hf == 0) /* Disable RSS */
2712                 return -(EINVAL);
2713         ixgbe_hw_rss_hash_set(hw, rss_conf);
2714         return 0;
2715 }
2716
2717 int
2718 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2719                             struct rte_eth_rss_conf *rss_conf)
2720 {
2721         struct ixgbe_hw *hw;
2722         uint8_t *hash_key;
2723         uint32_t mrqc;
2724         uint32_t rss_key;
2725         uint64_t rss_hf;
2726         uint16_t i;
2727         uint32_t mrqc_reg;
2728         uint32_t rssrk_reg;
2729
2730         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2731         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2732         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2733         hash_key = rss_conf->rss_key;
2734         if (hash_key != NULL) {
2735                 /* Return RSS hash key */
2736                 for (i = 0; i < 10; i++) {
2737                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2738                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2739                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2740                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2741                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2742                 }
2743         }
2744
2745         /* Get RSS functions configured in MRQC register */
2746         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2747         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2748                 rss_conf->rss_hf = 0;
2749                 return 0;
2750         }
2751         rss_hf = 0;
2752         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2753                 rss_hf |= ETH_RSS_IPV4;
2754         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2755                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2756         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2757                 rss_hf |= ETH_RSS_IPV6;
2758         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2759                 rss_hf |= ETH_RSS_IPV6_EX;
2760         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2761                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2762         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2763                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2764         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2765                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2766         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2767                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2768         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2769                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2770         rss_conf->rss_hf = rss_hf;
2771         return 0;
2772 }
2773
2774 static void
2775 ixgbe_rss_configure(struct rte_eth_dev *dev)
2776 {
2777         struct rte_eth_rss_conf rss_conf;
2778         struct ixgbe_hw *hw;
2779         uint32_t reta;
2780         uint16_t i;
2781         uint16_t j;
2782         uint16_t sp_reta_size;
2783         uint32_t reta_reg;
2784
2785         PMD_INIT_FUNC_TRACE();
2786         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2787
2788         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2789
2790         /*
2791          * Fill in redirection table
2792          * The byte-swap is needed because NIC registers are in
2793          * little-endian order.
2794          */
2795         reta = 0;
2796         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2797                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2798
2799                 if (j == dev->data->nb_rx_queues)
2800                         j = 0;
2801                 reta = (reta << 8) | j;
2802                 if ((i & 3) == 3)
2803                         IXGBE_WRITE_REG(hw, reta_reg,
2804                                         rte_bswap32(reta));
2805         }
2806
2807         /*
2808          * Configure the RSS key and the RSS protocols used to compute
2809          * the RSS hash of input packets.
2810          */
2811         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2812         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2813                 ixgbe_rss_disable(dev);
2814                 return;
2815         }
2816         if (rss_conf.rss_key == NULL)
2817                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2818         ixgbe_hw_rss_hash_set(hw, &rss_conf);
2819 }
2820
2821 #define NUM_VFTA_REGISTERS 128
2822 #define NIC_RX_BUFFER_SIZE 0x200
2823 #define X550_RX_BUFFER_SIZE 0x180
2824
2825 static void
2826 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2827 {
2828         struct rte_eth_vmdq_dcb_conf *cfg;
2829         struct ixgbe_hw *hw;
2830         enum rte_eth_nb_pools num_pools;
2831         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2832         uint16_t pbsize;
2833         uint8_t nb_tcs; /* number of traffic classes */
2834         int i;
2835
2836         PMD_INIT_FUNC_TRACE();
2837         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2838         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2839         num_pools = cfg->nb_queue_pools;
2840         /* Check we have a valid number of pools */
2841         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2842                 ixgbe_rss_disable(dev);
2843                 return;
2844         }
2845         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2846         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2847
2848         /*
2849          * RXPBSIZE
2850          * split rx buffer up into sections, each for 1 traffic class
2851          */
2852         switch (hw->mac.type) {
2853         case ixgbe_mac_X550:
2854         case ixgbe_mac_X550EM_x:
2855                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2856                 break;
2857         default:
2858                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2859                 break;
2860         }
2861         for (i = 0 ; i < nb_tcs; i++) {
2862                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2863                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2864                 /* clear 10 bits. */
2865                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2866                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2867         }
2868         /* zero alloc all unused TCs */
2869         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2870                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2871                 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2872                 /* clear 10 bits. */
2873                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2874         }
2875
2876         /* MRQC: enable vmdq and dcb */
2877         mrqc = ((num_pools == ETH_16_POOLS) ? \
2878                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2879         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2880
2881         /* PFVTCTL: turn on virtualisation and set the default pool */
2882         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2883         if (cfg->enable_default_pool) {
2884                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2885         } else {
2886                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2887         }
2888
2889         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2890
2891         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2892         queue_mapping = 0;
2893         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2894                 /*
2895                  * mapping is done with 3 bits per priority,
2896                  * so shift by i*3 each time
2897                  */
2898                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2899
2900         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2901
2902         /* RTRPCS: DCB related */
2903         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2904
2905         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2906         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2907         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2908         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2909
2910         /* VFTA - enable all vlan filters */
2911         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2912                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2913         }
2914
2915         /* VFRE: pool enabling for receive - 16 or 32 */
2916         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2917                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2918
2919         /*
2920          * MPSAR - allow pools to read specific mac addresses
2921          * In this case, all pools should be able to read from mac addr 0
2922          */
2923         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2924         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2925
2926         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2927         for (i = 0; i < cfg->nb_pool_maps; i++) {
2928                 /* set vlan id in VF register and set the valid bit */
2929                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2930                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
2931                 /*
2932                  * Put the allowed pools in VFB reg. As we only have 16 or 32
2933                  * pools, we only need to use the first half of the register
2934                  * i.e. bits 0-31
2935                  */
2936                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2937         }
2938 }
2939
2940 /**
2941  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2942  * @hw: pointer to hardware structure
2943  * @dcb_config: pointer to ixgbe_dcb_config structure
2944  */
2945 static void
2946 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2947                struct ixgbe_dcb_config *dcb_config)
2948 {
2949         uint32_t reg;
2950         uint32_t q;
2951
2952         PMD_INIT_FUNC_TRACE();
2953         if (hw->mac.type != ixgbe_mac_82598EB) {
2954                 /* Disable the Tx desc arbiter so that MTQC can be changed */
2955                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2956                 reg |= IXGBE_RTTDCS_ARBDIS;
2957                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2958
2959                 /* Enable DCB for Tx with 8 TCs */
2960                 if (dcb_config->num_tcs.pg_tcs == 8) {
2961                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2962                 }
2963                 else {
2964                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2965                 }
2966                 if (dcb_config->vt_mode)
2967                     reg |= IXGBE_MTQC_VT_ENA;
2968                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2969
2970                 /* Disable drop for all queues */
2971                 for (q = 0; q < 128; q++)
2972                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
2973                      (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2974
2975                 /* Enable the Tx desc arbiter */
2976                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2977                 reg &= ~IXGBE_RTTDCS_ARBDIS;
2978                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2979
2980                 /* Enable Security TX Buffer IFG for DCB */
2981                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2982                 reg |= IXGBE_SECTX_DCB;
2983                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2984         }
2985         return;
2986 }
2987
2988 /**
2989  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2990  * @dev: pointer to rte_eth_dev structure
2991  * @dcb_config: pointer to ixgbe_dcb_config structure
2992  */
2993 static void
2994 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2995                         struct ixgbe_dcb_config *dcb_config)
2996 {
2997         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2998                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2999         struct ixgbe_hw *hw =
3000                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3001
3002         PMD_INIT_FUNC_TRACE();
3003         if (hw->mac.type != ixgbe_mac_82598EB)
3004                 /*PF VF Transmit Enable*/
3005                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3006                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3007
3008         /*Configure general DCB TX parameters*/
3009         ixgbe_dcb_tx_hw_config(hw,dcb_config);
3010         return;
3011 }
3012
3013 static void
3014 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3015                         struct ixgbe_dcb_config *dcb_config)
3016 {
3017         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3018                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3019         struct ixgbe_dcb_tc_config *tc;
3020         uint8_t i,j;
3021
3022         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3023         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3024                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3025                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3026         }
3027         else {
3028                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3029                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3030         }
3031         /* User Priority to Traffic Class mapping */
3032         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3033                 j = vmdq_rx_conf->dcb_tc[i];
3034                 tc = &dcb_config->tc_config[j];
3035                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3036                                                 (uint8_t)(1 << j);
3037         }
3038 }
3039
3040 static void
3041 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3042                         struct ixgbe_dcb_config *dcb_config)
3043 {
3044         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3045                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3046         struct ixgbe_dcb_tc_config *tc;
3047         uint8_t i,j;
3048
3049         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3050         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3051                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3052                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3053         }
3054         else {
3055                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3056                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3057         }
3058
3059         /* User Priority to Traffic Class mapping */
3060         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3061                 j = vmdq_tx_conf->dcb_tc[i];
3062                 tc = &dcb_config->tc_config[j];
3063                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3064                                                 (uint8_t)(1 << j);
3065         }
3066         return;
3067 }
3068
3069 static void
3070 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3071                 struct ixgbe_dcb_config *dcb_config)
3072 {
3073         struct rte_eth_dcb_rx_conf *rx_conf =
3074                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3075         struct ixgbe_dcb_tc_config *tc;
3076         uint8_t i,j;
3077
3078         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3079         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3080
3081         /* User Priority to Traffic Class mapping */
3082         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3083                 j = rx_conf->dcb_tc[i];
3084                 tc = &dcb_config->tc_config[j];
3085                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3086                                                 (uint8_t)(1 << j);
3087         }
3088 }
3089
3090 static void
3091 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3092                 struct ixgbe_dcb_config *dcb_config)
3093 {
3094         struct rte_eth_dcb_tx_conf *tx_conf =
3095                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3096         struct ixgbe_dcb_tc_config *tc;
3097         uint8_t i,j;
3098
3099         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3100         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3101
3102         /* User Priority to Traffic Class mapping */
3103         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3104                 j = tx_conf->dcb_tc[i];
3105                 tc = &dcb_config->tc_config[j];
3106                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3107                                                 (uint8_t)(1 << j);
3108         }
3109 }
3110
3111 /**
3112  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3113  * @hw: pointer to hardware structure
3114  * @dcb_config: pointer to ixgbe_dcb_config structure
3115  */
3116 static void
3117 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3118                struct ixgbe_dcb_config *dcb_config)
3119 {
3120         uint32_t reg;
3121         uint32_t vlanctrl;
3122         uint8_t i;
3123
3124         PMD_INIT_FUNC_TRACE();
3125         /*
3126          * Disable the arbiter before changing parameters
3127          * (always enable recycle mode; WSP)
3128          */
3129         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3130         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3131
3132         if (hw->mac.type != ixgbe_mac_82598EB) {
3133                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3134                 if (dcb_config->num_tcs.pg_tcs == 4) {
3135                         if (dcb_config->vt_mode)
3136                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3137                                         IXGBE_MRQC_VMDQRT4TCEN;
3138                         else {
3139                                 /* no matter the mode is DCB or DCB_RSS, just
3140                                  * set the MRQE to RSSXTCEN. RSS is controlled
3141                                  * by RSS_FIELD
3142                                  */
3143                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3144                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3145                                         IXGBE_MRQC_RTRSS4TCEN;
3146                         }
3147                 }
3148                 if (dcb_config->num_tcs.pg_tcs == 8) {
3149                         if (dcb_config->vt_mode)
3150                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3151                                         IXGBE_MRQC_VMDQRT8TCEN;
3152                         else {
3153                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3154                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3155                                         IXGBE_MRQC_RTRSS8TCEN;
3156                         }
3157                 }
3158
3159                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3160         }
3161
3162         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3163         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3164         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3165         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3166
3167         /* VFTA - enable all vlan filters */
3168         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3169                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3170         }
3171
3172         /*
3173          * Configure Rx packet plane (recycle mode; WSP) and
3174          * enable arbiter
3175          */
3176         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3177         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3178
3179         return;
3180 }
3181
3182 static void
3183 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3184                         uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3185 {
3186         switch (hw->mac.type) {
3187         case ixgbe_mac_82598EB:
3188                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3189                 break;
3190         case ixgbe_mac_82599EB:
3191         case ixgbe_mac_X540:
3192         case ixgbe_mac_X550:
3193         case ixgbe_mac_X550EM_x:
3194                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3195                                                   tsa, map);
3196                 break;
3197         default:
3198                 break;
3199         }
3200 }
3201
3202 static void
3203 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3204                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3205 {
3206         switch (hw->mac.type) {
3207         case ixgbe_mac_82598EB:
3208                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3209                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3210                 break;
3211         case ixgbe_mac_82599EB:
3212         case ixgbe_mac_X540:
3213         case ixgbe_mac_X550:
3214         case ixgbe_mac_X550EM_x:
3215                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3216                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3217                 break;
3218         default:
3219                 break;
3220         }
3221 }
3222
3223 #define DCB_RX_CONFIG  1
3224 #define DCB_TX_CONFIG  1
3225 #define DCB_TX_PB      1024
3226 /**
3227  * ixgbe_dcb_hw_configure - Enable DCB and configure
3228  * general DCB in VT mode and non-VT mode parameters
3229  * @dev: pointer to rte_eth_dev structure
3230  * @dcb_config: pointer to ixgbe_dcb_config structure
3231  */
3232 static int
3233 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3234                         struct ixgbe_dcb_config *dcb_config)
3235 {
3236         int     ret = 0;
3237         uint8_t i,pfc_en,nb_tcs;
3238         uint16_t pbsize, rx_buffer_size;
3239         uint8_t config_dcb_rx = 0;
3240         uint8_t config_dcb_tx = 0;
3241         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3242         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3243         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3244         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3245         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3246         struct ixgbe_dcb_tc_config *tc;
3247         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3248         struct ixgbe_hw *hw =
3249                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3250
3251         switch(dev->data->dev_conf.rxmode.mq_mode){
3252         case ETH_MQ_RX_VMDQ_DCB:
3253                 dcb_config->vt_mode = true;
3254                 if (hw->mac.type != ixgbe_mac_82598EB) {
3255                         config_dcb_rx = DCB_RX_CONFIG;
3256                         /*
3257                          *get dcb and VT rx configuration parameters
3258                          *from rte_eth_conf
3259                          */
3260                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3261                         /*Configure general VMDQ and DCB RX parameters*/
3262                         ixgbe_vmdq_dcb_configure(dev);
3263                 }
3264                 break;
3265         case ETH_MQ_RX_DCB:
3266         case ETH_MQ_RX_DCB_RSS:
3267                 dcb_config->vt_mode = false;
3268                 config_dcb_rx = DCB_RX_CONFIG;
3269                 /* Get dcb TX configuration parameters from rte_eth_conf */
3270                 ixgbe_dcb_rx_config(dev, dcb_config);
3271                 /*Configure general DCB RX parameters*/
3272                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3273                 break;
3274         default:
3275                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3276                 break;
3277         }
3278         switch (dev->data->dev_conf.txmode.mq_mode) {
3279         case ETH_MQ_TX_VMDQ_DCB:
3280                 dcb_config->vt_mode = true;
3281                 config_dcb_tx = DCB_TX_CONFIG;
3282                 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3283                 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3284                 /*Configure general VMDQ and DCB TX parameters*/
3285                 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3286                 break;
3287
3288         case ETH_MQ_TX_DCB:
3289                 dcb_config->vt_mode = false;
3290                 config_dcb_tx = DCB_TX_CONFIG;
3291                 /*get DCB TX configuration parameters from rte_eth_conf*/
3292                 ixgbe_dcb_tx_config(dev, dcb_config);
3293                 /*Configure general DCB TX parameters*/
3294                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3295                 break;
3296         default:
3297                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3298                 break;
3299         }
3300
3301         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3302         /* Unpack map */
3303         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3304         if(nb_tcs == ETH_4_TCS) {
3305                 /* Avoid un-configured priority mapping to TC0 */
3306                 uint8_t j = 4;
3307                 uint8_t mask = 0xFF;
3308                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3309                         mask = (uint8_t)(mask & (~ (1 << map[i])));
3310                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3311                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3312                                 map[j++] = i;
3313                         mask >>= 1;
3314                 }
3315                 /* Re-configure 4 TCs BW */
3316                 for (i = 0; i < nb_tcs; i++) {
3317                         tc = &dcb_config->tc_config[i];
3318                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3319                                                 (uint8_t)(100 / nb_tcs);
3320                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3321                                                 (uint8_t)(100 / nb_tcs);
3322                 }
3323                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3324                         tc = &dcb_config->tc_config[i];
3325                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3326                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3327                 }
3328         }
3329
3330         switch (hw->mac.type) {
3331         case ixgbe_mac_X550:
3332         case ixgbe_mac_X550EM_x:
3333                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3334                 break;
3335         default:
3336                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3337                 break;
3338         }
3339
3340         if(config_dcb_rx) {
3341                 /* Set RX buffer size */
3342                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3343                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3344                 for (i = 0 ; i < nb_tcs; i++) {
3345                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3346                 }
3347                 /* zero alloc all unused TCs */
3348                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3349                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3350                 }
3351         }
3352         if(config_dcb_tx) {
3353                 /* Only support an equally distributed Tx packet buffer strategy. */
3354                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3355                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3356                 for (i = 0; i < nb_tcs; i++) {
3357                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3358                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3359                 }
3360                 /* Clear unused TCs, if any, to zero buffer size*/
3361                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3362                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3363                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3364                 }
3365         }
3366
3367         /*Calculates traffic class credits*/
3368         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3369                                 IXGBE_DCB_TX_CONFIG);
3370         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3371                                 IXGBE_DCB_RX_CONFIG);
3372
3373         if(config_dcb_rx) {
3374                 /* Unpack CEE standard containers */
3375                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3376                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3377                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3378                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3379                 /* Configure PG(ETS) RX */
3380                 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3381         }
3382
3383         if(config_dcb_tx) {
3384                 /* Unpack CEE standard containers */
3385                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3386                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3387                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3388                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3389                 /* Configure PG(ETS) TX */
3390                 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3391         }
3392
3393         /*Configure queue statistics registers*/
3394         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3395
3396         /* Check if the PFC is supported */
3397         if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3398                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3399                 for (i = 0; i < nb_tcs; i++) {
3400                         /*
3401                         * If the TC count is 8,and the default high_water is 48,
3402                         * the low_water is 16 as default.
3403                         */
3404                         hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3405                         hw->fc.low_water[i] = pbsize / 4;
3406                         /* Enable pfc for this TC */
3407                         tc = &dcb_config->tc_config[i];
3408                         tc->pfc = ixgbe_dcb_pfc_enabled;
3409                 }
3410                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3411                 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3412                         pfc_en &= 0x0F;
3413                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3414         }
3415
3416         return ret;
3417 }
3418
3419 /**
3420  * ixgbe_configure_dcb - Configure DCB  Hardware
3421  * @dev: pointer to rte_eth_dev
3422  */
3423 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3424 {
3425         struct ixgbe_dcb_config *dcb_cfg =
3426                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3427         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3428
3429         PMD_INIT_FUNC_TRACE();
3430
3431         /* check support mq_mode for DCB */
3432         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3433             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3434             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3435                 return;
3436
3437         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3438                 return;
3439
3440         /** Configure DCB hardware **/
3441         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3442
3443         return;
3444 }
3445
3446 /*
3447  * VMDq only support for 10 GbE NIC.
3448  */
3449 static void
3450 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3451 {
3452         struct rte_eth_vmdq_rx_conf *cfg;
3453         struct ixgbe_hw *hw;
3454         enum rte_eth_nb_pools num_pools;
3455         uint32_t mrqc, vt_ctl, vlanctrl;
3456         uint32_t vmolr = 0;
3457         int i;
3458
3459         PMD_INIT_FUNC_TRACE();
3460         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3461         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3462         num_pools = cfg->nb_queue_pools;
3463
3464         ixgbe_rss_disable(dev);
3465
3466         /* MRQC: enable vmdq */
3467         mrqc = IXGBE_MRQC_VMDQEN;
3468         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3469
3470         /* PFVTCTL: turn on virtualisation and set the default pool */
3471         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3472         if (cfg->enable_default_pool)
3473                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3474         else
3475                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3476
3477         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3478
3479         for (i = 0; i < (int)num_pools; i++) {
3480                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3481                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3482         }
3483
3484         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3485         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3486         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3487         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3488
3489         /* VFTA - enable all vlan filters */
3490         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3491                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3492
3493         /* VFRE: pool enabling for receive - 64 */
3494         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3495         if (num_pools == ETH_64_POOLS)
3496                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3497
3498         /*
3499          * MPSAR - allow pools to read specific mac addresses
3500          * In this case, all pools should be able to read from mac addr 0
3501          */
3502         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3503         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3504
3505         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3506         for (i = 0; i < cfg->nb_pool_maps; i++) {
3507                 /* set vlan id in VF register and set the valid bit */
3508                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3509                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3510                 /*
3511                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3512                  * pools, we only need to use the first half of the register
3513                  * i.e. bits 0-31
3514                  */
3515                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3516                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3517                                         (cfg->pool_map[i].pools & UINT32_MAX));
3518                 else
3519                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3520                                         ((cfg->pool_map[i].pools >> 32) \
3521                                         & UINT32_MAX));
3522
3523         }
3524
3525         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3526         if (cfg->enable_loop_back) {
3527                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3528                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3529                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3530         }
3531
3532         IXGBE_WRITE_FLUSH(hw);
3533 }
3534
3535 /*
3536  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3537  * @hw: pointer to hardware structure
3538  */
3539 static void
3540 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3541 {
3542         uint32_t reg;
3543         uint32_t q;
3544
3545         PMD_INIT_FUNC_TRACE();
3546         /*PF VF Transmit Enable*/
3547         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3548         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3549
3550         /* Disable the Tx desc arbiter so that MTQC can be changed */
3551         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3552         reg |= IXGBE_RTTDCS_ARBDIS;
3553         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3554
3555         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3556         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3557
3558         /* Disable drop for all queues */
3559         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3560                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3561                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3562
3563         /* Enable the Tx desc arbiter */
3564         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3565         reg &= ~IXGBE_RTTDCS_ARBDIS;
3566         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3567
3568         IXGBE_WRITE_FLUSH(hw);
3569
3570         return;
3571 }
3572
3573 static int __attribute__((cold))
3574 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3575 {
3576         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3577         uint64_t dma_addr;
3578         unsigned i;
3579
3580         /* Initialize software ring entries */
3581         for (i = 0; i < rxq->nb_rx_desc; i++) {
3582                 volatile union ixgbe_adv_rx_desc *rxd;
3583                 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3584                 if (mbuf == NULL) {
3585                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3586                                      (unsigned) rxq->queue_id);
3587                         return -ENOMEM;
3588                 }
3589
3590                 rte_mbuf_refcnt_set(mbuf, 1);
3591                 mbuf->next = NULL;
3592                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3593                 mbuf->nb_segs = 1;
3594                 mbuf->port = rxq->port_id;
3595
3596                 dma_addr =
3597                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3598                 rxd = &rxq->rx_ring[i];
3599                 rxd->read.hdr_addr = 0;
3600                 rxd->read.pkt_addr = dma_addr;
3601                 rxe[i].mbuf = mbuf;
3602         }
3603
3604         return 0;
3605 }
3606
3607 static int
3608 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3609 {
3610         struct ixgbe_hw *hw;
3611         uint32_t mrqc;
3612
3613         ixgbe_rss_configure(dev);
3614
3615         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3616
3617         /* MRQC: enable VF RSS */
3618         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3619         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3620         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3621         case ETH_64_POOLS:
3622                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3623                 break;
3624
3625         case ETH_32_POOLS:
3626                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3627                 break;
3628
3629         default:
3630                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3631                 return -EINVAL;
3632         }
3633
3634         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3635
3636         return 0;
3637 }
3638
3639 static int
3640 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3641 {
3642         struct ixgbe_hw *hw =
3643                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3644
3645         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3646         case ETH_64_POOLS:
3647                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3648                         IXGBE_MRQC_VMDQEN);
3649                 break;
3650
3651         case ETH_32_POOLS:
3652                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3653                         IXGBE_MRQC_VMDQRT4TCEN);
3654                 break;
3655
3656         case ETH_16_POOLS:
3657                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3658                         IXGBE_MRQC_VMDQRT8TCEN);
3659                 break;
3660         default:
3661                 PMD_INIT_LOG(ERR,
3662                         "invalid pool number in IOV mode");
3663                 break;
3664         }
3665         return 0;
3666 }
3667
3668 static int
3669 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3670 {
3671         struct ixgbe_hw *hw =
3672                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3673
3674         if (hw->mac.type == ixgbe_mac_82598EB)
3675                 return 0;
3676
3677         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3678                 /*
3679                  * SRIOV inactive scheme
3680                  * any DCB/RSS w/o VMDq multi-queue setting
3681                  */
3682                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3683                 case ETH_MQ_RX_RSS:
3684                 case ETH_MQ_RX_DCB_RSS:
3685                 case ETH_MQ_RX_VMDQ_RSS:
3686                         ixgbe_rss_configure(dev);
3687                         break;
3688
3689                 case ETH_MQ_RX_VMDQ_DCB:
3690                         ixgbe_vmdq_dcb_configure(dev);
3691                         break;
3692
3693                 case ETH_MQ_RX_VMDQ_ONLY:
3694                         ixgbe_vmdq_rx_hw_configure(dev);
3695                         break;
3696
3697                 case ETH_MQ_RX_NONE:
3698                 default:
3699                         /* if mq_mode is none, disable rss mode.*/
3700                         ixgbe_rss_disable(dev);
3701                         break;
3702                 }
3703         } else {
3704                 /*
3705                  * SRIOV active scheme
3706                  * Support RSS together with VMDq & SRIOV
3707                  */
3708                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3709                 case ETH_MQ_RX_RSS:
3710                 case ETH_MQ_RX_VMDQ_RSS:
3711                         ixgbe_config_vf_rss(dev);
3712                         break;
3713
3714                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3715                 case ETH_MQ_RX_VMDQ_DCB:
3716                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3717                         PMD_INIT_LOG(ERR,
3718                                 "Could not support DCB with VMDq & SRIOV");
3719                         return -1;
3720                 default:
3721                         ixgbe_config_vf_default(dev);
3722                         break;
3723                 }
3724         }
3725
3726         return 0;
3727 }
3728
3729 static int
3730 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3731 {
3732         struct ixgbe_hw *hw =
3733                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3734         uint32_t mtqc;
3735         uint32_t rttdcs;
3736
3737         if (hw->mac.type == ixgbe_mac_82598EB)
3738                 return 0;
3739
3740         /* disable arbiter before setting MTQC */
3741         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3742         rttdcs |= IXGBE_RTTDCS_ARBDIS;
3743         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3744
3745         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3746                 /*
3747                  * SRIOV inactive scheme
3748                  * any DCB w/o VMDq multi-queue setting
3749                  */
3750                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3751                         ixgbe_vmdq_tx_hw_configure(hw);
3752                 else {
3753                         mtqc = IXGBE_MTQC_64Q_1PB;
3754                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3755                 }
3756         } else {
3757                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3758
3759                 /*
3760                  * SRIOV active scheme
3761                  * FIXME if support DCB together with VMDq & SRIOV
3762                  */
3763                 case ETH_64_POOLS:
3764                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3765                         break;
3766                 case ETH_32_POOLS:
3767                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3768                         break;
3769                 case ETH_16_POOLS:
3770                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3771                                 IXGBE_MTQC_8TC_8TQ;
3772                         break;
3773                 default:
3774                         mtqc = IXGBE_MTQC_64Q_1PB;
3775                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3776                 }
3777                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3778         }
3779
3780         /* re-enable arbiter */
3781         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3782         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3783
3784         return 0;
3785 }
3786
3787 /**
3788  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3789  *
3790  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3791  * spec rev. 3.0 chapter 8.2.3.8.13.
3792  *
3793  * @pool Memory pool of the Rx queue
3794  */
3795 static inline uint32_t
3796 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3797 {
3798         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3799
3800         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3801         uint16_t maxdesc =
3802                 IPV4_MAX_PKT_LEN /
3803                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3804
3805         if (maxdesc >= 16)
3806                 return IXGBE_RSCCTL_MAXDESC_16;
3807         else if (maxdesc >= 8)
3808                 return IXGBE_RSCCTL_MAXDESC_8;
3809         else if (maxdesc >= 4)
3810                 return IXGBE_RSCCTL_MAXDESC_4;
3811         else
3812                 return IXGBE_RSCCTL_MAXDESC_1;
3813 }
3814
3815 /**
3816  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3817  * interrupt
3818  *
3819  * (Taken from FreeBSD tree)
3820  * (yes this is all very magic and confusing :)
3821  *
3822  * @dev port handle
3823  * @entry the register array entry
3824  * @vector the MSIX vector for this queue
3825  * @type RX/TX/MISC
3826  */
3827 static void
3828 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3829 {
3830         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3831         u32 ivar, index;
3832
3833         vector |= IXGBE_IVAR_ALLOC_VAL;
3834
3835         switch (hw->mac.type) {
3836
3837         case ixgbe_mac_82598EB:
3838                 if (type == -1)
3839                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3840                 else
3841                         entry += (type * 64);
3842                 index = (entry >> 2) & 0x1F;
3843                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3844                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3845                 ivar |= (vector << (8 * (entry & 0x3)));
3846                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3847                 break;
3848
3849         case ixgbe_mac_82599EB:
3850         case ixgbe_mac_X540:
3851                 if (type == -1) { /* MISC IVAR */
3852                         index = (entry & 1) * 8;
3853                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3854                         ivar &= ~(0xFF << index);
3855                         ivar |= (vector << index);
3856                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3857                 } else {        /* RX/TX IVARS */
3858                         index = (16 * (entry & 1)) + (8 * type);
3859                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3860                         ivar &= ~(0xFF << index);
3861                         ivar |= (vector << index);
3862                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3863                 }
3864
3865                 break;
3866
3867         default:
3868                 break;
3869         }
3870 }
3871
3872 void __attribute__((cold))
3873 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3874 {
3875         uint16_t i, rx_using_sse;
3876         struct ixgbe_adapter *adapter =
3877                 (struct ixgbe_adapter *)dev->data->dev_private;
3878
3879         /*
3880          * In order to allow Vector Rx there are a few configuration
3881          * conditions to be met and Rx Bulk Allocation should be allowed.
3882          */
3883         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3884             !adapter->rx_bulk_alloc_allowed) {
3885                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3886                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
3887                                     "not enabled",
3888                              dev->data->port_id);
3889
3890                 adapter->rx_vec_allowed = false;
3891         }
3892
3893         /*
3894          * Initialize the appropriate LRO callback.
3895          *
3896          * If all queues satisfy the bulk allocation preconditions
3897          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3898          * Otherwise use a single allocation version.
3899          */
3900         if (dev->data->lro) {
3901                 if (adapter->rx_bulk_alloc_allowed) {
3902                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3903                                            "allocation version");
3904                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3905                 } else {
3906                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3907                                            "allocation version");
3908                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3909                 }
3910         } else if (dev->data->scattered_rx) {
3911                 /*
3912                  * Set the non-LRO scattered callback: there are Vector and
3913                  * single allocation versions.
3914                  */
3915                 if (adapter->rx_vec_allowed) {
3916                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3917                                             "callback (port=%d).",
3918                                      dev->data->port_id);
3919
3920                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3921                 } else if (adapter->rx_bulk_alloc_allowed) {
3922                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3923                                            "allocation callback (port=%d).",
3924                                      dev->data->port_id);
3925                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3926                 } else {
3927                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3928                                             "single allocation) "
3929                                             "Scattered Rx callback "
3930                                             "(port=%d).",
3931                                      dev->data->port_id);
3932
3933                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3934                 }
3935         /*
3936          * Below we set "simple" callbacks according to port/queues parameters.
3937          * If parameters allow we are going to choose between the following
3938          * callbacks:
3939          *    - Vector
3940          *    - Bulk Allocation
3941          *    - Single buffer allocation (the simplest one)
3942          */
3943         } else if (adapter->rx_vec_allowed) {
3944                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3945                                     "burst size no less than %d (port=%d).",
3946                              RTE_IXGBE_DESCS_PER_LOOP,
3947                              dev->data->port_id);
3948
3949                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3950         } else if (adapter->rx_bulk_alloc_allowed) {
3951                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3952                                     "satisfied. Rx Burst Bulk Alloc function "
3953                                     "will be used on port=%d.",
3954                              dev->data->port_id);
3955
3956                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
3957         } else {
3958                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3959                                     "satisfied, or Scattered Rx is requested "
3960                                     "(port=%d).",
3961                              dev->data->port_id);
3962
3963                 dev->rx_pkt_burst = ixgbe_recv_pkts;
3964         }
3965
3966         /* Propagate information about RX function choice through all queues. */
3967
3968         rx_using_sse =
3969                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
3970                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
3971
3972         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3973                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3974                 rxq->rx_using_sse = rx_using_sse;
3975         }
3976 }
3977
3978 /**
3979  * ixgbe_set_rsc - configure RSC related port HW registers
3980  *
3981  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
3982  * of 82599 Spec (x540 configuration is virtually the same).
3983  *
3984  * @dev port handle
3985  *
3986  * Returns 0 in case of success or a non-zero error code
3987  */
3988 static int
3989 ixgbe_set_rsc(struct rte_eth_dev *dev)
3990 {
3991         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3992         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3993         struct rte_eth_dev_info dev_info = { 0 };
3994         bool rsc_capable = false;
3995         uint16_t i;
3996         uint32_t rdrxctl;
3997
3998         /* Sanity check */
3999         dev->dev_ops->dev_infos_get(dev, &dev_info);
4000         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4001                 rsc_capable = true;
4002
4003         if (!rsc_capable && rx_conf->enable_lro) {
4004                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4005                                    "support it");
4006                 return -EINVAL;
4007         }
4008
4009         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4010
4011         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4012                 /*
4013                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4014                  * 3.0 RSC configuration requires HW CRC stripping being
4015                  * enabled. If user requested both HW CRC stripping off
4016                  * and RSC on - return an error.
4017                  */
4018                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4019                                     "is disabled");
4020                 return -EINVAL;
4021         }
4022
4023         /* RFCTL configuration  */
4024         if (rsc_capable) {
4025                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4026                 if (rx_conf->enable_lro)
4027                         /*
4028                          * Since NFS packets coalescing is not supported - clear
4029                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4030                          * enabled.
4031                          */
4032                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4033                                    IXGBE_RFCTL_NFSR_DIS);
4034                 else
4035                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4036
4037                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4038         }
4039
4040         /* If LRO hasn't been requested - we are done here. */
4041         if (!rx_conf->enable_lro)
4042                 return 0;
4043
4044         /* Set RDRXCTL.RSCACKC bit */
4045         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4046         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4047         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4048
4049         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4050         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4051                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4052                 uint32_t srrctl =
4053                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4054                 uint32_t rscctl =
4055                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4056                 uint32_t psrtype =
4057                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4058                 uint32_t eitr =
4059                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4060
4061                 /*
4062                  * ixgbe PMD doesn't support header-split at the moment.
4063                  *
4064                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4065                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4066                  * should be configured even if header split is not
4067                  * enabled. We will configure it 128 bytes following the
4068                  * recommendation in the spec.
4069                  */
4070                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4071                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4072                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4073
4074                 /*
4075                  * TODO: Consider setting the Receive Descriptor Minimum
4076                  * Threshold Size for an RSC case. This is not an obviously
4077                  * beneficiary option but the one worth considering...
4078                  */
4079
4080                 rscctl |= IXGBE_RSCCTL_RSCEN;
4081                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4082                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4083
4084                 /*
4085                  * RSC: Set ITR interval corresponding to 2K ints/s.
4086                  *
4087                  * Full-sized RSC aggregations for a 10Gb/s link will
4088                  * arrive at about 20K aggregation/s rate.
4089                  *
4090                  * 2K inst/s rate will make only 10% of the
4091                  * aggregations to be closed due to the interrupt timer
4092                  * expiration for a streaming at wire-speed case.
4093                  *
4094                  * For a sparse streaming case this setting will yield
4095                  * at most 500us latency for a single RSC aggregation.
4096                  */
4097                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4098                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4099
4100                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4101                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4102                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4103                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4104
4105                 /*
4106                  * RSC requires the mapping of the queue to the
4107                  * interrupt vector.
4108                  */
4109                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4110         }
4111
4112         dev->data->lro = 1;
4113
4114         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4115
4116         return 0;
4117 }
4118
4119 /*
4120  * Initializes Receive Unit.
4121  */
4122 int __attribute__((cold))
4123 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4124 {
4125         struct ixgbe_hw     *hw;
4126         struct ixgbe_rx_queue *rxq;
4127         uint64_t bus_addr;
4128         uint32_t rxctrl;
4129         uint32_t fctrl;
4130         uint32_t hlreg0;
4131         uint32_t maxfrs;
4132         uint32_t srrctl;
4133         uint32_t rdrxctl;
4134         uint32_t rxcsum;
4135         uint16_t buf_size;
4136         uint16_t i;
4137         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4138         int rc;
4139
4140         PMD_INIT_FUNC_TRACE();
4141         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4142
4143         /*
4144          * Make sure receives are disabled while setting
4145          * up the RX context (registers, descriptor rings, etc.).
4146          */
4147         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4148         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4149
4150         /* Enable receipt of broadcasted frames */
4151         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4152         fctrl |= IXGBE_FCTRL_BAM;
4153         fctrl |= IXGBE_FCTRL_DPF;
4154         fctrl |= IXGBE_FCTRL_PMCF;
4155         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4156
4157         /*
4158          * Configure CRC stripping, if any.
4159          */
4160         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4161         if (rx_conf->hw_strip_crc)
4162                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4163         else
4164                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4165
4166         /*
4167          * Configure jumbo frame support, if any.
4168          */
4169         if (rx_conf->jumbo_frame == 1) {
4170                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4171                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4172                 maxfrs &= 0x0000FFFF;
4173                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4174                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4175         } else
4176                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4177
4178         /*
4179          * If loopback mode is configured for 82599, set LPBK bit.
4180          */
4181         if (hw->mac.type == ixgbe_mac_82599EB &&
4182                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4183                 hlreg0 |= IXGBE_HLREG0_LPBK;
4184         else
4185                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4186
4187         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4188
4189         /* Setup RX queues */
4190         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4191                 rxq = dev->data->rx_queues[i];
4192
4193                 /*
4194                  * Reset crc_len in case it was changed after queue setup by a
4195                  * call to configure.
4196                  */
4197                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4198
4199                 /* Setup the Base and Length of the Rx Descriptor Rings */
4200                 bus_addr = rxq->rx_ring_phys_addr;
4201                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4202                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4203                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4204                                 (uint32_t)(bus_addr >> 32));
4205                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4206                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4207                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4208                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4209
4210                 /* Configure the SRRCTL register */
4211 #ifdef RTE_HEADER_SPLIT_ENABLE
4212                 /*
4213                  * Configure Header Split
4214                  */
4215                 if (rx_conf->header_split) {
4216                         if (hw->mac.type == ixgbe_mac_82599EB) {
4217                                 /* Must setup the PSRTYPE register */
4218                                 uint32_t psrtype;
4219                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4220                                         IXGBE_PSRTYPE_UDPHDR   |
4221                                         IXGBE_PSRTYPE_IPV4HDR  |
4222                                         IXGBE_PSRTYPE_IPV6HDR;
4223                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4224                         }
4225                         srrctl = ((rx_conf->split_hdr_size <<
4226                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4227                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4228                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4229                 } else
4230 #endif
4231                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4232
4233                 /* Set if packets are dropped when no descriptors available */
4234                 if (rxq->drop_en)
4235                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4236
4237                 /*
4238                  * Configure the RX buffer size in the BSIZEPACKET field of
4239                  * the SRRCTL register of the queue.
4240                  * The value is in 1 KB resolution. Valid values can be from
4241                  * 1 KB to 16 KB.
4242                  */
4243                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4244                         RTE_PKTMBUF_HEADROOM);
4245                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4246                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4247
4248                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4249
4250                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4251                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4252
4253                 /* It adds dual VLAN length for supporting dual VLAN */
4254                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4255                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4256                         dev->data->scattered_rx = 1;
4257         }
4258
4259         if (rx_conf->enable_scatter)
4260                 dev->data->scattered_rx = 1;
4261
4262         /*
4263          * Device configured with multiple RX queues.
4264          */
4265         ixgbe_dev_mq_rx_configure(dev);
4266
4267         /*
4268          * Setup the Checksum Register.
4269          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4270          * Enable IP/L4 checkum computation by hardware if requested to do so.
4271          */
4272         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4273         rxcsum |= IXGBE_RXCSUM_PCSD;
4274         if (rx_conf->hw_ip_checksum)
4275                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4276         else
4277                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4278
4279         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4280
4281         if (hw->mac.type == ixgbe_mac_82599EB ||
4282             hw->mac.type == ixgbe_mac_X540) {
4283                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4284                 if (rx_conf->hw_strip_crc)
4285                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4286                 else
4287                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4288                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4289                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4290         }
4291
4292         rc = ixgbe_set_rsc(dev);
4293         if (rc)
4294                 return rc;
4295
4296         ixgbe_set_rx_function(dev);
4297
4298         return 0;
4299 }
4300
4301 /*
4302  * Initializes Transmit Unit.
4303  */
4304 void __attribute__((cold))
4305 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4306 {
4307         struct ixgbe_hw     *hw;
4308         struct ixgbe_tx_queue *txq;
4309         uint64_t bus_addr;
4310         uint32_t hlreg0;
4311         uint32_t txctrl;
4312         uint16_t i;
4313
4314         PMD_INIT_FUNC_TRACE();
4315         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4316
4317         /* Enable TX CRC (checksum offload requirement) and hw padding
4318          * (TSO requirement) */
4319         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4320         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4321         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4322
4323         /* Setup the Base and Length of the Tx Descriptor Rings */
4324         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4325                 txq = dev->data->tx_queues[i];
4326
4327                 bus_addr = txq->tx_ring_phys_addr;
4328                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4329                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4330                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4331                                 (uint32_t)(bus_addr >> 32));
4332                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4333                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4334                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4335                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4336                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4337
4338                 /*
4339                  * Disable Tx Head Writeback RO bit, since this hoses
4340                  * bookkeeping if things aren't delivered in order.
4341                  */
4342                 switch (hw->mac.type) {
4343                         case ixgbe_mac_82598EB:
4344                                 txctrl = IXGBE_READ_REG(hw,
4345                                                         IXGBE_DCA_TXCTRL(txq->reg_idx));
4346                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4347                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4348                                                 txctrl);
4349                                 break;
4350
4351                         case ixgbe_mac_82599EB:
4352                         case ixgbe_mac_X540:
4353                         case ixgbe_mac_X550:
4354                         case ixgbe_mac_X550EM_x:
4355                         default:
4356                                 txctrl = IXGBE_READ_REG(hw,
4357                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4358                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4359                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4360                                                 txctrl);
4361                                 break;
4362                 }
4363         }
4364
4365         /* Device configured with multiple TX queues. */
4366         ixgbe_dev_mq_tx_configure(dev);
4367 }
4368
4369 /*
4370  * Set up link for 82599 loopback mode Tx->Rx.
4371  */
4372 static inline void __attribute__((cold))
4373 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4374 {
4375         PMD_INIT_FUNC_TRACE();
4376
4377         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4378                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4379                                 IXGBE_SUCCESS) {
4380                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4381                         /* ignore error */
4382                         return;
4383                 }
4384         }
4385
4386         /* Restart link */
4387         IXGBE_WRITE_REG(hw,
4388                         IXGBE_AUTOC,
4389                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4390         ixgbe_reset_pipeline_82599(hw);
4391
4392         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4393         msec_delay(50);
4394 }
4395
4396
4397 /*
4398  * Start Transmit and Receive Units.
4399  */
4400 int __attribute__((cold))
4401 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4402 {
4403         struct ixgbe_hw     *hw;
4404         struct ixgbe_tx_queue *txq;
4405         struct ixgbe_rx_queue *rxq;
4406         uint32_t txdctl;
4407         uint32_t dmatxctl;
4408         uint32_t rxctrl;
4409         uint16_t i;
4410         int ret = 0;
4411
4412         PMD_INIT_FUNC_TRACE();
4413         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4414
4415         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4416                 txq = dev->data->tx_queues[i];
4417                 /* Setup Transmit Threshold Registers */
4418                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4419                 txdctl |= txq->pthresh & 0x7F;
4420                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4421                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4422                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4423         }
4424
4425         if (hw->mac.type != ixgbe_mac_82598EB) {
4426                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4427                 dmatxctl |= IXGBE_DMATXCTL_TE;
4428                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4429         }
4430
4431         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4432                 txq = dev->data->tx_queues[i];
4433                 if (!txq->tx_deferred_start) {
4434                         ret = ixgbe_dev_tx_queue_start(dev, i);
4435                         if (ret < 0)
4436                                 return ret;
4437                 }
4438         }
4439
4440         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4441                 rxq = dev->data->rx_queues[i];
4442                 if (!rxq->rx_deferred_start) {
4443                         ret = ixgbe_dev_rx_queue_start(dev, i);
4444                         if (ret < 0)
4445                                 return ret;
4446                 }
4447         }
4448
4449         /* Enable Receive engine */
4450         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4451         if (hw->mac.type == ixgbe_mac_82598EB)
4452                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4453         rxctrl |= IXGBE_RXCTRL_RXEN;
4454         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4455
4456         /* If loopback mode is enabled for 82599, set up the link accordingly */
4457         if (hw->mac.type == ixgbe_mac_82599EB &&
4458                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4459                 ixgbe_setup_loopback_link_82599(hw);
4460
4461         return 0;
4462 }
4463
4464 /*
4465  * Start Receive Units for specified queue.
4466  */
4467 int __attribute__((cold))
4468 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4469 {
4470         struct ixgbe_hw     *hw;
4471         struct ixgbe_rx_queue *rxq;
4472         uint32_t rxdctl;
4473         int poll_ms;
4474
4475         PMD_INIT_FUNC_TRACE();
4476         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4477
4478         if (rx_queue_id < dev->data->nb_rx_queues) {
4479                 rxq = dev->data->rx_queues[rx_queue_id];
4480
4481                 /* Allocate buffers for descriptor rings */
4482                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4483                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4484                                      rx_queue_id);
4485                         return -1;
4486                 }
4487                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4488                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4489                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4490
4491                 /* Wait until RX Enable ready */
4492                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4493                 do {
4494                         rte_delay_ms(1);
4495                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4496                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4497                 if (!poll_ms)
4498                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4499                                      rx_queue_id);
4500                 rte_wmb();
4501                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4502                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4503                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4504         } else
4505                 return -1;
4506
4507         return 0;
4508 }
4509
4510 /*
4511  * Stop Receive Units for specified queue.
4512  */
4513 int __attribute__((cold))
4514 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4515 {
4516         struct ixgbe_hw     *hw;
4517         struct ixgbe_adapter *adapter =
4518                 (struct ixgbe_adapter *)dev->data->dev_private;
4519         struct ixgbe_rx_queue *rxq;
4520         uint32_t rxdctl;
4521         int poll_ms;
4522
4523         PMD_INIT_FUNC_TRACE();
4524         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4525
4526         if (rx_queue_id < dev->data->nb_rx_queues) {
4527                 rxq = dev->data->rx_queues[rx_queue_id];
4528
4529                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4530                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4531                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4532
4533                 /* Wait until RX Enable ready */
4534                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4535                 do {
4536                         rte_delay_ms(1);
4537                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4538                 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4539                 if (!poll_ms)
4540                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4541                                      rx_queue_id);
4542
4543                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4544
4545                 ixgbe_rx_queue_release_mbufs(rxq);
4546                 ixgbe_reset_rx_queue(adapter, rxq);
4547                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4548         } else
4549                 return -1;
4550
4551         return 0;
4552 }
4553
4554
4555 /*
4556  * Start Transmit Units for specified queue.
4557  */
4558 int __attribute__((cold))
4559 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4560 {
4561         struct ixgbe_hw     *hw;
4562         struct ixgbe_tx_queue *txq;
4563         uint32_t txdctl;
4564         int poll_ms;
4565
4566         PMD_INIT_FUNC_TRACE();
4567         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4568
4569         if (tx_queue_id < dev->data->nb_tx_queues) {
4570                 txq = dev->data->tx_queues[tx_queue_id];
4571                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4572                 txdctl |= IXGBE_TXDCTL_ENABLE;
4573                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4574
4575                 /* Wait until TX Enable ready */
4576                 if (hw->mac.type == ixgbe_mac_82599EB) {
4577                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4578                         do {
4579                                 rte_delay_ms(1);
4580                                 txdctl = IXGBE_READ_REG(hw,
4581                                         IXGBE_TXDCTL(txq->reg_idx));
4582                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4583                         if (!poll_ms)
4584                                 PMD_INIT_LOG(ERR, "Could not enable "
4585                                              "Tx Queue %d", tx_queue_id);
4586                 }
4587                 rte_wmb();
4588                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4589                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4590                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4591         } else
4592                 return -1;
4593
4594         return 0;
4595 }
4596
4597 /*
4598  * Stop Transmit Units for specified queue.
4599  */
4600 int __attribute__((cold))
4601 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4602 {
4603         struct ixgbe_hw     *hw;
4604         struct ixgbe_tx_queue *txq;
4605         uint32_t txdctl;
4606         uint32_t txtdh, txtdt;
4607         int poll_ms;
4608
4609         PMD_INIT_FUNC_TRACE();
4610         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4611
4612         if (tx_queue_id < dev->data->nb_tx_queues) {
4613                 txq = dev->data->tx_queues[tx_queue_id];
4614
4615                 /* Wait until TX queue is empty */
4616                 if (hw->mac.type == ixgbe_mac_82599EB) {
4617                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4618                         do {
4619                                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4620                                 txtdh = IXGBE_READ_REG(hw,
4621                                                 IXGBE_TDH(txq->reg_idx));
4622                                 txtdt = IXGBE_READ_REG(hw,
4623                                                 IXGBE_TDT(txq->reg_idx));
4624                         } while (--poll_ms && (txtdh != txtdt));
4625                         if (!poll_ms)
4626                                 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4627                                              "when stopping.", tx_queue_id);
4628                 }
4629
4630                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4631                 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4632                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4633
4634                 /* Wait until TX Enable ready */
4635                 if (hw->mac.type == ixgbe_mac_82599EB) {
4636                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4637                         do {
4638                                 rte_delay_ms(1);
4639                                 txdctl = IXGBE_READ_REG(hw,
4640                                                 IXGBE_TXDCTL(txq->reg_idx));
4641                         } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4642                         if (!poll_ms)
4643                                 PMD_INIT_LOG(ERR, "Could not disable "
4644                                              "Tx Queue %d", tx_queue_id);
4645                 }
4646
4647                 if (txq->ops != NULL) {
4648                         txq->ops->release_mbufs(txq);
4649                         txq->ops->reset(txq);
4650                 }
4651                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4652         } else
4653                 return -1;
4654
4655         return 0;
4656 }
4657
4658 void
4659 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4660         struct rte_eth_rxq_info *qinfo)
4661 {
4662         struct ixgbe_rx_queue *rxq;
4663
4664         rxq = dev->data->rx_queues[queue_id];
4665
4666         qinfo->mp = rxq->mb_pool;
4667         qinfo->scattered_rx = dev->data->scattered_rx;
4668         qinfo->nb_desc = rxq->nb_rx_desc;
4669
4670         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4671         qinfo->conf.rx_drop_en = rxq->drop_en;
4672         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4673 }
4674
4675 void
4676 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4677         struct rte_eth_txq_info *qinfo)
4678 {
4679         struct ixgbe_tx_queue *txq;
4680
4681         txq = dev->data->tx_queues[queue_id];
4682
4683         qinfo->nb_desc = txq->nb_tx_desc;
4684
4685         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4686         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4687         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4688
4689         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4690         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4691         qinfo->conf.txq_flags = txq->txq_flags;
4692         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4693 }
4694
4695 /*
4696  * [VF] Initializes Receive Unit.
4697  */
4698 int __attribute__((cold))
4699 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4700 {
4701         struct ixgbe_hw     *hw;
4702         struct ixgbe_rx_queue *rxq;
4703         uint64_t bus_addr;
4704         uint32_t srrctl, psrtype = 0;
4705         uint16_t buf_size;
4706         uint16_t i;
4707         int ret;
4708
4709         PMD_INIT_FUNC_TRACE();
4710         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4711
4712         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4713                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4714                         "it should be power of 2");
4715                 return -1;
4716         }
4717
4718         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4719                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4720                         "it should be equal to or less than %d",
4721                         hw->mac.max_rx_queues);
4722                 return -1;
4723         }
4724
4725         /*
4726          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4727          * disables the VF receipt of packets if the PF MTU is > 1500.
4728          * This is done to deal with 82599 limitations that imposes
4729          * the PF and all VFs to share the same MTU.
4730          * Then, the PF driver enables again the VF receipt of packet when
4731          * the VF driver issues a IXGBE_VF_SET_LPE request.
4732          * In the meantime, the VF device cannot be used, even if the VF driver
4733          * and the Guest VM network stack are ready to accept packets with a
4734          * size up to the PF MTU.
4735          * As a work-around to this PF behaviour, force the call to
4736          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4737          * VF packets received can work in all cases.
4738          */
4739         ixgbevf_rlpml_set_vf(hw,
4740                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4741
4742         /* Setup RX queues */
4743         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4744                 rxq = dev->data->rx_queues[i];
4745
4746                 /* Allocate buffers for descriptor rings */
4747                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4748                 if (ret)
4749                         return ret;
4750
4751                 /* Setup the Base and Length of the Rx Descriptor Rings */
4752                 bus_addr = rxq->rx_ring_phys_addr;
4753
4754                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4755                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4756                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4757                                 (uint32_t)(bus_addr >> 32));
4758                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4759                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4760                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4761                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4762
4763
4764                 /* Configure the SRRCTL register */
4765 #ifdef RTE_HEADER_SPLIT_ENABLE
4766                 /*
4767                  * Configure Header Split
4768                  */
4769                 if (dev->data->dev_conf.rxmode.header_split) {
4770                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4771                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4772                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4773                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4774                 } else
4775 #endif
4776                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4777
4778                 /* Set if packets are dropped when no descriptors available */
4779                 if (rxq->drop_en)
4780                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4781
4782                 /*
4783                  * Configure the RX buffer size in the BSIZEPACKET field of
4784                  * the SRRCTL register of the queue.
4785                  * The value is in 1 KB resolution. Valid values can be from
4786                  * 1 KB to 16 KB.
4787                  */
4788                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4789                         RTE_PKTMBUF_HEADROOM);
4790                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4791                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4792
4793                 /*
4794                  * VF modification to write virtual function SRRCTL register
4795                  */
4796                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4797
4798                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4799                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4800
4801                 if (dev->data->dev_conf.rxmode.enable_scatter ||
4802                     /* It adds dual VLAN length for supporting dual VLAN */
4803                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4804                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4805                         if (!dev->data->scattered_rx)
4806                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4807                         dev->data->scattered_rx = 1;
4808                 }
4809         }
4810
4811 #ifdef RTE_HEADER_SPLIT_ENABLE
4812         if (dev->data->dev_conf.rxmode.header_split)
4813                 /* Must setup the PSRTYPE register */
4814                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4815                         IXGBE_PSRTYPE_UDPHDR   |
4816                         IXGBE_PSRTYPE_IPV4HDR  |
4817                         IXGBE_PSRTYPE_IPV6HDR;
4818 #endif
4819
4820         /* Set RQPL for VF RSS according to max Rx queue */
4821         psrtype |= (dev->data->nb_rx_queues >> 1) <<
4822                 IXGBE_PSRTYPE_RQPL_SHIFT;
4823         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4824
4825         ixgbe_set_rx_function(dev);
4826
4827         return 0;
4828 }
4829
4830 /*
4831  * [VF] Initializes Transmit Unit.
4832  */
4833 void __attribute__((cold))
4834 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4835 {
4836         struct ixgbe_hw     *hw;
4837         struct ixgbe_tx_queue *txq;
4838         uint64_t bus_addr;
4839         uint32_t txctrl;
4840         uint16_t i;
4841
4842         PMD_INIT_FUNC_TRACE();
4843         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4844
4845         /* Setup the Base and Length of the Tx Descriptor Rings */
4846         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4847                 txq = dev->data->tx_queues[i];
4848                 bus_addr = txq->tx_ring_phys_addr;
4849                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4850                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4851                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4852                                 (uint32_t)(bus_addr >> 32));
4853                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4854                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4855                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4856                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4857                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4858
4859                 /*
4860                  * Disable Tx Head Writeback RO bit, since this hoses
4861                  * bookkeeping if things aren't delivered in order.
4862                  */
4863                 txctrl = IXGBE_READ_REG(hw,
4864                                 IXGBE_VFDCA_TXCTRL(i));
4865                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4866                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4867                                 txctrl);
4868         }
4869 }
4870
4871 /*
4872  * [VF] Start Transmit and Receive Units.
4873  */
4874 void __attribute__((cold))
4875 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4876 {
4877         struct ixgbe_hw     *hw;
4878         struct ixgbe_tx_queue *txq;
4879         struct ixgbe_rx_queue *rxq;
4880         uint32_t txdctl;
4881         uint32_t rxdctl;
4882         uint16_t i;
4883         int poll_ms;
4884
4885         PMD_INIT_FUNC_TRACE();
4886         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4887
4888         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4889                 txq = dev->data->tx_queues[i];
4890                 /* Setup Transmit Threshold Registers */
4891                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4892                 txdctl |= txq->pthresh & 0x7F;
4893                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4894                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4895                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4896         }
4897
4898         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4899
4900                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4901                 txdctl |= IXGBE_TXDCTL_ENABLE;
4902                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4903
4904                 poll_ms = 10;
4905                 /* Wait until TX Enable ready */
4906                 do {
4907                         rte_delay_ms(1);
4908                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4909                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4910                 if (!poll_ms)
4911                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4912         }
4913         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4914
4915                 rxq = dev->data->rx_queues[i];
4916
4917                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4918                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4919                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4920
4921                 /* Wait until RX Enable ready */
4922                 poll_ms = 10;
4923                 do {
4924                         rte_delay_ms(1);
4925                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4926                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4927                 if (!poll_ms)
4928                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4929                 rte_wmb();
4930                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4931
4932         }
4933 }
4934
4935 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4936 int __attribute__((weak))
4937 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4938 {
4939         return -1;
4940 }
4941
4942 uint16_t __attribute__((weak))
4943 ixgbe_recv_pkts_vec(
4944         void __rte_unused *rx_queue,
4945         struct rte_mbuf __rte_unused **rx_pkts,
4946         uint16_t __rte_unused nb_pkts)
4947 {
4948         return 0;
4949 }
4950
4951 uint16_t __attribute__((weak))
4952 ixgbe_recv_scattered_pkts_vec(
4953         void __rte_unused *rx_queue,
4954         struct rte_mbuf __rte_unused **rx_pkts,
4955         uint16_t __rte_unused nb_pkts)
4956 {
4957         return 0;
4958 }
4959
4960 int __attribute__((weak))
4961 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
4962 {
4963         return -1;
4964 }