ixgbe: fix Rx with buffer address not word aligned
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG)
89
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
92 {
93         struct rte_mbuf *m;
94
95         m = __rte_mbuf_raw_alloc(mp);
96         __rte_mbuf_sanity_check_raw(m, 0);
97         return (m);
98 }
99
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while(0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i;
130
131         /* check DD bit on threshold descriptor */
132         status = txq->tx_ring[txq->tx_next_dd].wb.status;
133         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
134                 return 0;
135
136         /*
137          * first buffer to free from S/W ring is at index
138          * tx_next_dd - (tx_rs_thresh-1)
139          */
140         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
141
142         /* free buffers one at a time */
143         if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145                         txep->mbuf->next = NULL;
146                         rte_mempool_put(txep->mbuf->pool, txep->mbuf);
147                         txep->mbuf = NULL;
148                 }
149         } else {
150                 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151                         rte_pktmbuf_free_seg(txep->mbuf);
152                         txep->mbuf = NULL;
153                 }
154         }
155
156         /* buffers were freed, update counters */
157         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159         if (txq->tx_next_dd >= txq->nb_tx_desc)
160                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
161
162         return txq->tx_rs_thresh;
163 }
164
165 /* Populate 4 descriptors with data from 4 mbufs */
166 static inline void
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
168 {
169         uint64_t buf_dma_addr;
170         uint32_t pkt_len;
171         int i;
172
173         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174                 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
175                 pkt_len = (*pkts)->data_len;
176
177                 /* write data to descriptor */
178                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
179
180                 txdp->read.cmd_type_len =
181                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
182
183                 txdp->read.olinfo_status =
184                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
185
186                 rte_prefetch0(&(*pkts)->pool);
187         }
188 }
189
190 /* Populate 1 descriptor with data from 1 mbuf */
191 static inline void
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
193 {
194         uint64_t buf_dma_addr;
195         uint32_t pkt_len;
196
197         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
198         pkt_len = (*pkts)->data_len;
199
200         /* write data to descriptor */
201         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202         txdp->read.cmd_type_len =
203                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204         txdp->read.olinfo_status =
205                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206         rte_prefetch0(&(*pkts)->pool);
207 }
208
209 /*
210  * Fill H/W descriptor ring with mbuf data.
211  * Copy mbuf pointers to the S/W ring.
212  */
213 static inline void
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
215                       uint16_t nb_pkts)
216 {
217         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219         const int N_PER_LOOP = 4;
220         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221         int mainpart, leftover;
222         int i, j;
223
224         /*
225          * Process most of the packets in chunks of N pkts.  Any
226          * leftover packets will get processed one at a time.
227          */
228         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
230         for (i = 0; i < mainpart; i += N_PER_LOOP) {
231                 /* Copy N mbuf pointers to the S/W ring */
232                 for (j = 0; j < N_PER_LOOP; ++j) {
233                         (txep + i + j)->mbuf = *(pkts + i + j);
234                 }
235                 tx4(txdp + i, pkts + i);
236         }
237
238         if (unlikely(leftover > 0)) {
239                 for (i = 0; i < leftover; ++i) {
240                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241                         tx1(txdp + mainpart + i, pkts + mainpart + i);
242                 }
243         }
244 }
245
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
248              uint16_t nb_pkts)
249 {
250         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
252         uint16_t n = 0;
253
254         /*
255          * Begin scanning the H/W ring for done descriptors when the
256          * number of available descriptors drops below tx_free_thresh.  For
257          * each done descriptor, free the associated buffer.
258          */
259         if (txq->nb_tx_free < txq->tx_free_thresh)
260                 ixgbe_tx_free_bufs(txq);
261
262         /* Only use descriptors that are available */
263         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264         if (unlikely(nb_pkts == 0))
265                 return 0;
266
267         /* Use exactly nb_pkts descriptors */
268         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
269
270         /*
271          * At this point, we know there are enough descriptors in the
272          * ring to transmit all the packets.  This assumes that each
273          * mbuf contains a single segment, and that no new offloads
274          * are expected, which would require a new context descriptor.
275          */
276
277         /*
278          * See if we're going to wrap-around. If so, handle the top
279          * of the descriptor ring first, then do the bottom.  If not,
280          * the processing looks just like the "bottom" part anyway...
281          */
282         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
285
286                 /*
287                  * We know that the last descriptor in the ring will need to
288                  * have its RS bit set because tx_rs_thresh has to be
289                  * a divisor of the ring size
290                  */
291                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
294
295                 txq->tx_tail = 0;
296         }
297
298         /* Fill H/W descriptor ring with mbuf data */
299         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
301
302         /*
303          * Determine if RS bit should be set
304          * This is what we actually want:
305          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306          * but instead of subtracting 1 and doing >=, we can just do
307          * greater than without subtracting.
308          */
309         if (txq->tx_tail > txq->tx_next_rs) {
310                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
313                                                 txq->tx_rs_thresh);
314                 if (txq->tx_next_rs >= txq->nb_tx_desc)
315                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
316         }
317
318         /*
319          * Check for wrap-around. This would only happen if we used
320          * up to the last descriptor in the ring, no more, no less.
321          */
322         if (txq->tx_tail >= txq->nb_tx_desc)
323                 txq->tx_tail = 0;
324
325         /* update tail pointer */
326         rte_wmb();
327         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
328
329         return nb_pkts;
330 }
331
332 uint16_t
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
334                        uint16_t nb_pkts)
335 {
336         uint16_t nb_tx;
337
338         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
341
342         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
343         nb_tx = 0;
344         while (nb_pkts) {
345                 uint16_t ret, n;
346                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348                 nb_tx = (uint16_t)(nb_tx + ret);
349                 nb_pkts = (uint16_t)(nb_pkts - ret);
350                 if (ret < n)
351                         break;
352         }
353
354         return nb_tx;
355 }
356
357 static inline void
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
361 {
362         uint32_t type_tucmd_mlhl;
363         uint32_t mss_l4len_idx = 0;
364         uint32_t ctx_idx;
365         uint32_t vlan_macip_lens;
366         union ixgbe_tx_offload tx_offload_mask;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         tx_offload_mask.l4_len |= ~0;
419                         break;
420                 case PKT_TX_SCTP_CKSUM:
421                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
422                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
423                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
424                         tx_offload_mask.l2_len |= ~0;
425                         tx_offload_mask.l3_len |= ~0;
426                         break;
427                 default:
428                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430                         break;
431                 }
432         }
433
434         txq->ctx_cache[ctx_idx].flags = ol_flags;
435         txq->ctx_cache[ctx_idx].tx_offload.data  =
436                 tx_offload_mask.data & tx_offload.data;
437         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
438
439         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
440         vlan_macip_lens = tx_offload.l3_len;
441         vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
442         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
443         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
444         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
445         ctx_txd->seqnum_seed     = 0;
446 }
447
448 /*
449  * Check which hardware context can be used. Use the existing match
450  * or create a new context descriptor.
451  */
452 static inline uint32_t
453 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
454                 union ixgbe_tx_offload tx_offload)
455 {
456         /* If match with the current used context */
457         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
458                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
459                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
460                         return txq->ctx_curr;
461         }
462
463         /* What if match with the next context  */
464         txq->ctx_curr ^= 1;
465         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
466                 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
467                 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
468                         return txq->ctx_curr;
469         }
470
471         /* Mismatch, use the previous context */
472         return (IXGBE_CTX_NUM);
473 }
474
475 static inline uint32_t
476 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
477 {
478         uint32_t tmp = 0;
479         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
480                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
481         if (ol_flags & PKT_TX_IP_CKSUM)
482                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
483         if (ol_flags & PKT_TX_TCP_SEG)
484                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
485         return tmp;
486 }
487
488 static inline uint32_t
489 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
490 {
491         uint32_t cmdtype = 0;
492         if (ol_flags & PKT_TX_VLAN_PKT)
493                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
494         if (ol_flags & PKT_TX_TCP_SEG)
495                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
496         return cmdtype;
497 }
498
499 /* Default RS bit threshold values */
500 #ifndef DEFAULT_TX_RS_THRESH
501 #define DEFAULT_TX_RS_THRESH   32
502 #endif
503 #ifndef DEFAULT_TX_FREE_THRESH
504 #define DEFAULT_TX_FREE_THRESH 32
505 #endif
506
507 /* Reset transmit descriptors after they have been used */
508 static inline int
509 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
510 {
511         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
512         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
513         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
514         uint16_t nb_tx_desc = txq->nb_tx_desc;
515         uint16_t desc_to_clean_to;
516         uint16_t nb_tx_to_clean;
517         uint32_t status;
518
519         /* Determine the last descriptor needing to be cleaned */
520         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
521         if (desc_to_clean_to >= nb_tx_desc)
522                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
523
524         /* Check to make sure the last descriptor to clean is done */
525         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
526         status = txr[desc_to_clean_to].wb.status;
527         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
528         {
529                 PMD_TX_FREE_LOG(DEBUG,
530                                 "TX descriptor %4u is not done"
531                                 "(port=%d queue=%d)",
532                                 desc_to_clean_to,
533                                 txq->port_id, txq->queue_id);
534                 /* Failed to clean any descriptors, better luck next time */
535                 return -(1);
536         }
537
538         /* Figure out how many descriptors will be cleaned */
539         if (last_desc_cleaned > desc_to_clean_to)
540                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
541                                                         desc_to_clean_to);
542         else
543                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
544                                                 last_desc_cleaned);
545
546         PMD_TX_FREE_LOG(DEBUG,
547                         "Cleaning %4u TX descriptors: %4u to %4u "
548                         "(port=%d queue=%d)",
549                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
550                         txq->port_id, txq->queue_id);
551
552         /*
553          * The last descriptor to clean is done, so that means all the
554          * descriptors from the last descriptor that was cleaned
555          * up to the last descriptor with the RS bit set
556          * are done. Only reset the threshold descriptor.
557          */
558         txr[desc_to_clean_to].wb.status = 0;
559
560         /* Update the txq to reflect the last descriptor that was cleaned */
561         txq->last_desc_cleaned = desc_to_clean_to;
562         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
563
564         /* No Error */
565         return (0);
566 }
567
568 uint16_t
569 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
570                 uint16_t nb_pkts)
571 {
572         struct ixgbe_tx_queue *txq;
573         struct ixgbe_tx_entry *sw_ring;
574         struct ixgbe_tx_entry *txe, *txn;
575         volatile union ixgbe_adv_tx_desc *txr;
576         volatile union ixgbe_adv_tx_desc *txd;
577         struct rte_mbuf     *tx_pkt;
578         struct rte_mbuf     *m_seg;
579         uint64_t buf_dma_addr;
580         uint32_t olinfo_status;
581         uint32_t cmd_type_len;
582         uint32_t pkt_len;
583         uint16_t slen;
584         uint64_t ol_flags;
585         uint16_t tx_id;
586         uint16_t tx_last;
587         uint16_t nb_tx;
588         uint16_t nb_used;
589         uint64_t tx_ol_req;
590         uint32_t ctx = 0;
591         uint32_t new_ctx;
592         union ixgbe_tx_offload tx_offload = {0};
593
594         txq = tx_queue;
595         sw_ring = txq->sw_ring;
596         txr     = txq->tx_ring;
597         tx_id   = txq->tx_tail;
598         txe = &sw_ring[tx_id];
599
600         /* Determine if the descriptor ring needs to be cleaned. */
601         if (txq->nb_tx_free < txq->tx_free_thresh)
602                 ixgbe_xmit_cleanup(txq);
603
604         rte_prefetch0(&txe->mbuf->pool);
605
606         /* TX loop */
607         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
608                 new_ctx = 0;
609                 tx_pkt = *tx_pkts++;
610                 pkt_len = tx_pkt->pkt_len;
611
612                 /*
613                  * Determine how many (if any) context descriptors
614                  * are needed for offload functionality.
615                  */
616                 ol_flags = tx_pkt->ol_flags;
617
618                 /* If hardware offload required */
619                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
620                 if (tx_ol_req) {
621                         tx_offload.l2_len = tx_pkt->l2_len;
622                         tx_offload.l3_len = tx_pkt->l3_len;
623                         tx_offload.l4_len = tx_pkt->l4_len;
624                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
625                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
626
627                         /* If new context need be built or reuse the exist ctx. */
628                         ctx = what_advctx_update(txq, tx_ol_req,
629                                 tx_offload);
630                         /* Only allocate context descriptor if required*/
631                         new_ctx = (ctx == IXGBE_CTX_NUM);
632                         ctx = txq->ctx_curr;
633                 }
634
635                 /*
636                  * Keep track of how many descriptors are used this loop
637                  * This will always be the number of segments + the number of
638                  * Context descriptors required to transmit the packet
639                  */
640                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
641
642                 /*
643                  * The number of descriptors that must be allocated for a
644                  * packet is the number of segments of that packet, plus 1
645                  * Context Descriptor for the hardware offload, if any.
646                  * Determine the last TX descriptor to allocate in the TX ring
647                  * for the packet, starting from the current position (tx_id)
648                  * in the ring.
649                  */
650                 tx_last = (uint16_t) (tx_id + nb_used - 1);
651
652                 /* Circular ring */
653                 if (tx_last >= txq->nb_tx_desc)
654                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
655
656                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
657                            " tx_first=%u tx_last=%u",
658                            (unsigned) txq->port_id,
659                            (unsigned) txq->queue_id,
660                            (unsigned) pkt_len,
661                            (unsigned) tx_id,
662                            (unsigned) tx_last);
663
664                 /*
665                  * Make sure there are enough TX descriptors available to
666                  * transmit the entire packet.
667                  * nb_used better be less than or equal to txq->tx_rs_thresh
668                  */
669                 if (nb_used > txq->nb_tx_free) {
670                         PMD_TX_FREE_LOG(DEBUG,
671                                         "Not enough free TX descriptors "
672                                         "nb_used=%4u nb_free=%4u "
673                                         "(port=%d queue=%d)",
674                                         nb_used, txq->nb_tx_free,
675                                         txq->port_id, txq->queue_id);
676
677                         if (ixgbe_xmit_cleanup(txq) != 0) {
678                                 /* Could not clean any descriptors */
679                                 if (nb_tx == 0)
680                                         return (0);
681                                 goto end_of_tx;
682                         }
683
684                         /* nb_used better be <= txq->tx_rs_thresh */
685                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
686                                 PMD_TX_FREE_LOG(DEBUG,
687                                         "The number of descriptors needed to "
688                                         "transmit the packet exceeds the "
689                                         "RS bit threshold. This will impact "
690                                         "performance."
691                                         "nb_used=%4u nb_free=%4u "
692                                         "tx_rs_thresh=%4u. "
693                                         "(port=%d queue=%d)",
694                                         nb_used, txq->nb_tx_free,
695                                         txq->tx_rs_thresh,
696                                         txq->port_id, txq->queue_id);
697                                 /*
698                                  * Loop here until there are enough TX
699                                  * descriptors or until the ring cannot be
700                                  * cleaned.
701                                  */
702                                 while (nb_used > txq->nb_tx_free) {
703                                         if (ixgbe_xmit_cleanup(txq) != 0) {
704                                                 /*
705                                                  * Could not clean any
706                                                  * descriptors
707                                                  */
708                                                 if (nb_tx == 0)
709                                                         return (0);
710                                                 goto end_of_tx;
711                                         }
712                                 }
713                         }
714                 }
715
716                 /*
717                  * By now there are enough free TX descriptors to transmit
718                  * the packet.
719                  */
720
721                 /*
722                  * Set common flags of all TX Data Descriptors.
723                  *
724                  * The following bits must be set in all Data Descriptors:
725                  *   - IXGBE_ADVTXD_DTYP_DATA
726                  *   - IXGBE_ADVTXD_DCMD_DEXT
727                  *
728                  * The following bits must be set in the first Data Descriptor
729                  * and are ignored in the other ones:
730                  *   - IXGBE_ADVTXD_DCMD_IFCS
731                  *   - IXGBE_ADVTXD_MAC_1588
732                  *   - IXGBE_ADVTXD_DCMD_VLE
733                  *
734                  * The following bits must only be set in the last Data
735                  * Descriptor:
736                  *   - IXGBE_TXD_CMD_EOP
737                  *
738                  * The following bits can be set in any Data Descriptor, but
739                  * are only set in the last Data Descriptor:
740                  *   - IXGBE_TXD_CMD_RS
741                  */
742                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
743                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
744
745 #ifdef RTE_LIBRTE_IEEE1588
746                 if (ol_flags & PKT_TX_IEEE1588_TMST)
747                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
748 #endif
749
750                 olinfo_status = 0;
751                 if (tx_ol_req) {
752
753                         if (ol_flags & PKT_TX_TCP_SEG) {
754                                 /* when TSO is on, paylen in descriptor is the
755                                  * not the packet len but the tcp payload len */
756                                 pkt_len -= (tx_offload.l2_len +
757                                         tx_offload.l3_len + tx_offload.l4_len);
758                         }
759
760                         /*
761                          * Setup the TX Advanced Context Descriptor if required
762                          */
763                         if (new_ctx) {
764                                 volatile struct ixgbe_adv_tx_context_desc *
765                                     ctx_txd;
766
767                                 ctx_txd = (volatile struct
768                                     ixgbe_adv_tx_context_desc *)
769                                     &txr[tx_id];
770
771                                 txn = &sw_ring[txe->next_id];
772                                 rte_prefetch0(&txn->mbuf->pool);
773
774                                 if (txe->mbuf != NULL) {
775                                         rte_pktmbuf_free_seg(txe->mbuf);
776                                         txe->mbuf = NULL;
777                                 }
778
779                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
780                                         tx_offload);
781
782                                 txe->last_id = tx_last;
783                                 tx_id = txe->next_id;
784                                 txe = txn;
785                         }
786
787                         /*
788                          * Setup the TX Advanced Data Descriptor,
789                          * This path will go through
790                          * whatever new/reuse the context descriptor
791                          */
792                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
793                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
794                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
795                 }
796
797                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
798
799                 m_seg = tx_pkt;
800                 do {
801                         txd = &txr[tx_id];
802                         txn = &sw_ring[txe->next_id];
803                         rte_prefetch0(&txn->mbuf->pool);
804
805                         if (txe->mbuf != NULL)
806                                 rte_pktmbuf_free_seg(txe->mbuf);
807                         txe->mbuf = m_seg;
808
809                         /*
810                          * Set up Transmit Data Descriptor.
811                          */
812                         slen = m_seg->data_len;
813                         buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
814                         txd->read.buffer_addr =
815                                 rte_cpu_to_le_64(buf_dma_addr);
816                         txd->read.cmd_type_len =
817                                 rte_cpu_to_le_32(cmd_type_len | slen);
818                         txd->read.olinfo_status =
819                                 rte_cpu_to_le_32(olinfo_status);
820                         txe->last_id = tx_last;
821                         tx_id = txe->next_id;
822                         txe = txn;
823                         m_seg = m_seg->next;
824                 } while (m_seg != NULL);
825
826                 /*
827                  * The last packet data descriptor needs End Of Packet (EOP)
828                  */
829                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
830                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
831                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
832
833                 /* Set RS bit only on threshold packets' last descriptor */
834                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
835                         PMD_TX_FREE_LOG(DEBUG,
836                                         "Setting RS bit on TXD id="
837                                         "%4u (port=%d queue=%d)",
838                                         tx_last, txq->port_id, txq->queue_id);
839
840                         cmd_type_len |= IXGBE_TXD_CMD_RS;
841
842                         /* Update txq RS bit counters */
843                         txq->nb_tx_used = 0;
844                 }
845                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
846         }
847 end_of_tx:
848         rte_wmb();
849
850         /*
851          * Set the Transmit Descriptor Tail (TDT)
852          */
853         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
854                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
855                    (unsigned) tx_id, (unsigned) nb_tx);
856         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
857         txq->tx_tail = tx_id;
858
859         return (nb_tx);
860 }
861
862 /*********************************************************************
863  *
864  *  RX functions
865  *
866  **********************************************************************/
867 #ifdef RTE_NEXT_ABI
868 #define IXGBE_PACKET_TYPE_IPV4              0X01
869 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
870 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
871 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
872 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
873 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
874 #define IXGBE_PACKET_TYPE_IPV6              0X04
875 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
876 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
877 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
878 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
879 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
880 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
881 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
882 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
883 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
884 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
885 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
886 #define IXGBE_PACKET_TYPE_MAX               0X80
887 #define IXGBE_PACKET_TYPE_MASK              0X7F
888 #define IXGBE_PACKET_TYPE_SHIFT             0X04
889 static inline uint32_t
890 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
891 {
892         static const uint32_t
893                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
894                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
895                         RTE_PTYPE_L3_IPV4,
896                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
897                         RTE_PTYPE_L3_IPV4_EXT,
898                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
899                         RTE_PTYPE_L3_IPV6,
900                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
901                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
902                         RTE_PTYPE_INNER_L3_IPV6,
903                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
904                         RTE_PTYPE_L3_IPV6_EXT,
905                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
906                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
907                         RTE_PTYPE_INNER_L3_IPV6_EXT,
908                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
909                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
910                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
911                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
912                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
913                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
914                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
915                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
916                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
917                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
918                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
919                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
920                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
921                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
922                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
923                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
924                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
925                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
926                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
927                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
928                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
929                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
930                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
931                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
932                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
933                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
934                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
935                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
936         };
937         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
938                 return RTE_PTYPE_UNKNOWN;
939
940         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
941                                 IXGBE_PACKET_TYPE_MASK;
942
943         return ptype_table[pkt_info];
944 }
945
946 static inline uint64_t
947 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
948 {
949         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
950                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
951                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
952                 PKT_RX_RSS_HASH, 0, 0, 0,
953                 0, 0, 0,  PKT_RX_FDIR,
954         };
955 #ifdef RTE_LIBRTE_IEEE1588
956         static uint64_t ip_pkt_etqf_map[8] = {
957                 0, 0, 0, PKT_RX_IEEE1588_PTP,
958                 0, 0, 0, 0,
959         };
960
961         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
962                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
963                                 ip_rss_types_map[pkt_info & 0XF];
964         else
965                 return ip_rss_types_map[pkt_info & 0XF];
966 #else
967         return ip_rss_types_map[pkt_info & 0XF];
968 #endif
969 }
970 #else /* RTE_NEXT_ABI */
971 static inline uint64_t
972 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
973 {
974         uint64_t pkt_flags;
975
976         static const uint64_t ip_pkt_types_map[16] = {
977                 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
978                 PKT_RX_IPV6_HDR, 0, 0, 0,
979                 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
980                 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
981         };
982
983         static const uint64_t ip_rss_types_map[16] = {
984                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
985                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
986                 PKT_RX_RSS_HASH, 0, 0, 0,
987                 0, 0, 0,  PKT_RX_FDIR,
988         };
989
990 #ifdef RTE_LIBRTE_IEEE1588
991         static uint64_t ip_pkt_etqf_map[8] = {
992                 0, 0, 0, PKT_RX_IEEE1588_PTP,
993                 0, 0, 0, 0,
994         };
995
996         pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
997                         ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
998                         ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
999 #else
1000         pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
1001                         ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
1002
1003 #endif
1004         return pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF];
1005 }
1006 #endif /* RTE_NEXT_ABI */
1007
1008 static inline uint64_t
1009 rx_desc_status_to_pkt_flags(uint32_t rx_status)
1010 {
1011         uint64_t pkt_flags;
1012
1013         /*
1014          * Check if VLAN present only.
1015          * Do not check whether L3/L4 rx checksum done by NIC or not,
1016          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1017          */
1018         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
1019
1020 #ifdef RTE_LIBRTE_IEEE1588
1021         if (rx_status & IXGBE_RXD_STAT_TMST)
1022                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1023 #endif
1024         return pkt_flags;
1025 }
1026
1027 static inline uint64_t
1028 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1029 {
1030         /*
1031          * Bit 31: IPE, IPv4 checksum error
1032          * Bit 30: L4I, L4I integrity error
1033          */
1034         static uint64_t error_to_pkt_flags_map[4] = {
1035                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1036                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1037         };
1038         return error_to_pkt_flags_map[(rx_status >>
1039                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1040 }
1041
1042 /*
1043  * LOOK_AHEAD defines how many desc statuses to check beyond the
1044  * current descriptor.
1045  * It must be a pound define for optimal performance.
1046  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1047  * function only works with LOOK_AHEAD=8.
1048  */
1049 #define LOOK_AHEAD 8
1050 #if (LOOK_AHEAD != 8)
1051 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1052 #endif
1053 static inline int
1054 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1055 {
1056         volatile union ixgbe_adv_rx_desc *rxdp;
1057         struct ixgbe_rx_entry *rxep;
1058         struct rte_mbuf *mb;
1059         uint16_t pkt_len;
1060         uint64_t pkt_flags;
1061 #ifdef RTE_NEXT_ABI
1062         int nb_dd;
1063         uint32_t s[LOOK_AHEAD];
1064         uint16_t pkt_info[LOOK_AHEAD];
1065 #else
1066         int s[LOOK_AHEAD], nb_dd;
1067 #endif /* RTE_NEXT_ABI */
1068         int i, j, nb_rx = 0;
1069         uint32_t status;
1070
1071         /* get references to current descriptor and S/W ring entry */
1072         rxdp = &rxq->rx_ring[rxq->rx_tail];
1073         rxep = &rxq->sw_ring[rxq->rx_tail];
1074
1075         status = rxdp->wb.upper.status_error;
1076         /* check to make sure there is at least 1 packet to receive */
1077         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1078                 return 0;
1079
1080         /*
1081          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1082          * reference packets that are ready to be received.
1083          */
1084         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1085              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1086         {
1087                 /* Read desc statuses backwards to avoid race condition */
1088                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1089                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1090
1091 #ifdef RTE_NEXT_ABI
1092                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1093                         pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1094                                                 hs_rss.pkt_info;
1095 #endif /* RTE_NEXT_ABI */
1096
1097                 /* Compute how many status bits were set */
1098                 nb_dd = 0;
1099                 for (j = 0; j < LOOK_AHEAD; ++j)
1100                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1101
1102                 nb_rx += nb_dd;
1103
1104                 /* Translate descriptor info to mbuf format */
1105                 for (j = 0; j < nb_dd; ++j) {
1106                         mb = rxep[j].mbuf;
1107                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1108                                   rxq->crc_len;
1109                         mb->data_len = pkt_len;
1110                         mb->pkt_len = pkt_len;
1111                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1112
1113                         /* convert descriptor fields to rte mbuf flags */
1114 #ifdef RTE_NEXT_ABI
1115                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1116                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1117                         pkt_flags |=
1118                                 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1119                         mb->ol_flags = pkt_flags;
1120                         mb->packet_type =
1121                                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1122 #else /* RTE_NEXT_ABI */
1123                         pkt_flags  = rx_desc_hlen_type_rss_to_pkt_flags(
1124                                         rte_le_to_cpu_32(
1125                                         rxdp[j].wb.lower.lo_dword.data));
1126                         /* reuse status field from scan list */
1127                         pkt_flags |= rx_desc_status_to_pkt_flags(s[j]);
1128                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1129                         mb->ol_flags = pkt_flags;
1130 #endif /* RTE_NEXT_ABI */
1131
1132                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1133                                 mb->hash.rss = rte_le_to_cpu_32(
1134                                     rxdp[j].wb.lower.hi_dword.rss);
1135                         else if (pkt_flags & PKT_RX_FDIR) {
1136                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1137                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1138                                     IXGBE_ATR_HASH_MASK;
1139                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1140                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1141                         }
1142                 }
1143
1144                 /* Move mbuf pointers from the S/W ring to the stage */
1145                 for (j = 0; j < LOOK_AHEAD; ++j) {
1146                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1147                 }
1148
1149                 /* stop if all requested packets could not be received */
1150                 if (nb_dd != LOOK_AHEAD)
1151                         break;
1152         }
1153
1154         /* clear software ring entries so we can cleanup correctly */
1155         for (i = 0; i < nb_rx; ++i) {
1156                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1157         }
1158
1159
1160         return nb_rx;
1161 }
1162
1163 static inline int
1164 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1165 {
1166         volatile union ixgbe_adv_rx_desc *rxdp;
1167         struct ixgbe_rx_entry *rxep;
1168         struct rte_mbuf *mb;
1169         uint16_t alloc_idx;
1170         __le64 dma_addr;
1171         int diag, i;
1172
1173         /* allocate buffers in bulk directly into the S/W ring */
1174         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1175         rxep = &rxq->sw_ring[alloc_idx];
1176         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1177                                     rxq->rx_free_thresh);
1178         if (unlikely(diag != 0))
1179                 return (-ENOMEM);
1180
1181         rxdp = &rxq->rx_ring[alloc_idx];
1182         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1183                 /* populate the static rte mbuf fields */
1184                 mb = rxep[i].mbuf;
1185                 if (reset_mbuf) {
1186                         mb->next = NULL;
1187                         mb->nb_segs = 1;
1188                         mb->port = rxq->port_id;
1189                 }
1190
1191                 rte_mbuf_refcnt_set(mb, 1);
1192                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1193
1194                 /* populate the descriptors */
1195                 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
1196                 rxdp[i].read.hdr_addr = 0;
1197                 rxdp[i].read.pkt_addr = dma_addr;
1198         }
1199
1200         /* update state of internal queue structure */
1201         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1202         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1203                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1204
1205         /* no errors */
1206         return 0;
1207 }
1208
1209 static inline uint16_t
1210 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1211                          uint16_t nb_pkts)
1212 {
1213         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1214         int i;
1215
1216         /* how many packets are ready to return? */
1217         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1218
1219         /* copy mbuf pointers to the application's packet list */
1220         for (i = 0; i < nb_pkts; ++i)
1221                 rx_pkts[i] = stage[i];
1222
1223         /* update internal queue state */
1224         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1225         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1226
1227         return nb_pkts;
1228 }
1229
1230 static inline uint16_t
1231 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1232              uint16_t nb_pkts)
1233 {
1234         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1235         uint16_t nb_rx = 0;
1236
1237         /* Any previously recv'd pkts will be returned from the Rx stage */
1238         if (rxq->rx_nb_avail)
1239                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1240
1241         /* Scan the H/W ring for packets to receive */
1242         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1243
1244         /* update internal queue state */
1245         rxq->rx_next_avail = 0;
1246         rxq->rx_nb_avail = nb_rx;
1247         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1248
1249         /* if required, allocate new buffers to replenish descriptors */
1250         if (rxq->rx_tail > rxq->rx_free_trigger) {
1251                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1252
1253                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1254                         int i, j;
1255                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1256                                    "queue_id=%u", (unsigned) rxq->port_id,
1257                                    (unsigned) rxq->queue_id);
1258
1259                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1260                                 rxq->rx_free_thresh;
1261
1262                         /*
1263                          * Need to rewind any previous receives if we cannot
1264                          * allocate new buffers to replenish the old ones.
1265                          */
1266                         rxq->rx_nb_avail = 0;
1267                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1268                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1269                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1270
1271                         return 0;
1272                 }
1273
1274                 /* update tail pointer */
1275                 rte_wmb();
1276                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1277         }
1278
1279         if (rxq->rx_tail >= rxq->nb_rx_desc)
1280                 rxq->rx_tail = 0;
1281
1282         /* received any packets this loop? */
1283         if (rxq->rx_nb_avail)
1284                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1285
1286         return 0;
1287 }
1288
1289 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1290 static uint16_t
1291 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1292                            uint16_t nb_pkts)
1293 {
1294         uint16_t nb_rx;
1295
1296         if (unlikely(nb_pkts == 0))
1297                 return 0;
1298
1299         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1300                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1301
1302         /* request is relatively large, chunk it up */
1303         nb_rx = 0;
1304         while (nb_pkts) {
1305                 uint16_t ret, n;
1306                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1307                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1308                 nb_rx = (uint16_t)(nb_rx + ret);
1309                 nb_pkts = (uint16_t)(nb_pkts - ret);
1310                 if (ret < n)
1311                         break;
1312         }
1313
1314         return nb_rx;
1315 }
1316
1317 uint16_t
1318 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1319                 uint16_t nb_pkts)
1320 {
1321         struct ixgbe_rx_queue *rxq;
1322         volatile union ixgbe_adv_rx_desc *rx_ring;
1323         volatile union ixgbe_adv_rx_desc *rxdp;
1324         struct ixgbe_rx_entry *sw_ring;
1325         struct ixgbe_rx_entry *rxe;
1326         struct rte_mbuf *rxm;
1327         struct rte_mbuf *nmb;
1328         union ixgbe_adv_rx_desc rxd;
1329         uint64_t dma_addr;
1330         uint32_t staterr;
1331 #ifdef RTE_NEXT_ABI
1332         uint32_t pkt_info;
1333 #else
1334         uint32_t hlen_type_rss;
1335 #endif
1336         uint16_t pkt_len;
1337         uint16_t rx_id;
1338         uint16_t nb_rx;
1339         uint16_t nb_hold;
1340         uint64_t pkt_flags;
1341
1342         nb_rx = 0;
1343         nb_hold = 0;
1344         rxq = rx_queue;
1345         rx_id = rxq->rx_tail;
1346         rx_ring = rxq->rx_ring;
1347         sw_ring = rxq->sw_ring;
1348         while (nb_rx < nb_pkts) {
1349                 /*
1350                  * The order of operations here is important as the DD status
1351                  * bit must not be read after any other descriptor fields.
1352                  * rx_ring and rxdp are pointing to volatile data so the order
1353                  * of accesses cannot be reordered by the compiler. If they were
1354                  * not volatile, they could be reordered which could lead to
1355                  * using invalid descriptor fields when read from rxd.
1356                  */
1357                 rxdp = &rx_ring[rx_id];
1358                 staterr = rxdp->wb.upper.status_error;
1359                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1360                         break;
1361                 rxd = *rxdp;
1362
1363                 /*
1364                  * End of packet.
1365                  *
1366                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1367                  * is likely to be invalid and to be dropped by the various
1368                  * validation checks performed by the network stack.
1369                  *
1370                  * Allocate a new mbuf to replenish the RX ring descriptor.
1371                  * If the allocation fails:
1372                  *    - arrange for that RX descriptor to be the first one
1373                  *      being parsed the next time the receive function is
1374                  *      invoked [on the same queue].
1375                  *
1376                  *    - Stop parsing the RX ring and return immediately.
1377                  *
1378                  * This policy do not drop the packet received in the RX
1379                  * descriptor for which the allocation of a new mbuf failed.
1380                  * Thus, it allows that packet to be later retrieved if
1381                  * mbuf have been freed in the mean time.
1382                  * As a side effect, holding RX descriptors instead of
1383                  * systematically giving them back to the NIC may lead to
1384                  * RX ring exhaustion situations.
1385                  * However, the NIC can gracefully prevent such situations
1386                  * to happen by sending specific "back-pressure" flow control
1387                  * frames to its peer(s).
1388                  */
1389                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1390                            "ext_err_stat=0x%08x pkt_len=%u",
1391                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1392                            (unsigned) rx_id, (unsigned) staterr,
1393                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1394
1395                 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1396                 if (nmb == NULL) {
1397                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1398                                    "queue_id=%u", (unsigned) rxq->port_id,
1399                                    (unsigned) rxq->queue_id);
1400                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1401                         break;
1402                 }
1403
1404                 nb_hold++;
1405                 rxe = &sw_ring[rx_id];
1406                 rx_id++;
1407                 if (rx_id == rxq->nb_rx_desc)
1408                         rx_id = 0;
1409
1410                 /* Prefetch next mbuf while processing current one. */
1411                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1412
1413                 /*
1414                  * When next RX descriptor is on a cache-line boundary,
1415                  * prefetch the next 4 RX descriptors and the next 8 pointers
1416                  * to mbufs.
1417                  */
1418                 if ((rx_id & 0x3) == 0) {
1419                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1420                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1421                 }
1422
1423                 rxm = rxe->mbuf;
1424                 rxe->mbuf = nmb;
1425                 dma_addr =
1426                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1427                 rxdp->read.hdr_addr = 0;
1428                 rxdp->read.pkt_addr = dma_addr;
1429
1430                 /*
1431                  * Initialize the returned mbuf.
1432                  * 1) setup generic mbuf fields:
1433                  *    - number of segments,
1434                  *    - next segment,
1435                  *    - packet length,
1436                  *    - RX port identifier.
1437                  * 2) integrate hardware offload data, if any:
1438                  *    - RSS flag & hash,
1439                  *    - IP checksum flag,
1440                  *    - VLAN TCI, if any,
1441                  *    - error flags.
1442                  */
1443                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1444                                       rxq->crc_len);
1445                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1446                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1447                 rxm->nb_segs = 1;
1448                 rxm->next = NULL;
1449                 rxm->pkt_len = pkt_len;
1450                 rxm->data_len = pkt_len;
1451                 rxm->port = rxq->port_id;
1452
1453 #ifdef RTE_NEXT_ABI
1454                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1455                                                                 pkt_info);
1456                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1457                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1458
1459                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1460                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1461                 pkt_flags = pkt_flags |
1462                         ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1463                 rxm->ol_flags = pkt_flags;
1464                 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1465 #else /* RTE_NEXT_ABI */
1466                 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1467                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1468                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1469
1470                 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1471                 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1472                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1473                 rxm->ol_flags = pkt_flags;
1474 #endif /* RTE_NEXT_ABI */
1475
1476                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1477                         rxm->hash.rss = rte_le_to_cpu_32(
1478                                                 rxd.wb.lower.hi_dword.rss);
1479                 else if (pkt_flags & PKT_RX_FDIR) {
1480                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1481                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1482                                         IXGBE_ATR_HASH_MASK;
1483                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1484                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1485                 }
1486                 /*
1487                  * Store the mbuf address into the next entry of the array
1488                  * of returned packets.
1489                  */
1490                 rx_pkts[nb_rx++] = rxm;
1491         }
1492         rxq->rx_tail = rx_id;
1493
1494         /*
1495          * If the number of free RX descriptors is greater than the RX free
1496          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1497          * register.
1498          * Update the RDT with the value of the last processed RX descriptor
1499          * minus 1, to guarantee that the RDT register is never equal to the
1500          * RDH register, which creates a "full" ring situtation from the
1501          * hardware point of view...
1502          */
1503         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1504         if (nb_hold > rxq->rx_free_thresh) {
1505                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1506                            "nb_hold=%u nb_rx=%u",
1507                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1508                            (unsigned) rx_id, (unsigned) nb_hold,
1509                            (unsigned) nb_rx);
1510                 rx_id = (uint16_t) ((rx_id == 0) ?
1511                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1512                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1513                 nb_hold = 0;
1514         }
1515         rxq->nb_rx_hold = nb_hold;
1516         return (nb_rx);
1517 }
1518
1519 /**
1520  * Detect an RSC descriptor.
1521  */
1522 static inline uint32_t
1523 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1524 {
1525         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1526                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1527 }
1528
1529 /**
1530  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1531  *
1532  * Fill the following info in the HEAD buffer of the Rx cluster:
1533  *    - RX port identifier
1534  *    - hardware offload data, if any:
1535  *      - RSS flag & hash
1536  *      - IP checksum flag
1537  *      - VLAN TCI, if any
1538  *      - error flags
1539  * @head HEAD of the packet cluster
1540  * @desc HW descriptor to get data from
1541  * @port_id Port ID of the Rx queue
1542  */
1543 static inline void
1544 ixgbe_fill_cluster_head_buf(
1545         struct rte_mbuf *head,
1546         union ixgbe_adv_rx_desc *desc,
1547         uint8_t port_id,
1548         uint32_t staterr)
1549 {
1550 #ifdef RTE_NEXT_ABI
1551         uint16_t pkt_info;
1552         uint64_t pkt_flags;
1553
1554         head->port = port_id;
1555
1556         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1557          * set in the pkt_flags field.
1558          */
1559         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1560         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1561         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1562         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1563         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1564         head->ol_flags = pkt_flags;
1565         head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1566 #else /* RTE_NEXT_ABI */
1567         uint32_t hlen_type_rss;
1568         uint64_t pkt_flags;
1569
1570         head->port = port_id;
1571
1572         /*
1573          * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1574          * set in the pkt_flags field.
1575          */
1576         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1577         hlen_type_rss = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1578         pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1579         pkt_flags |= rx_desc_status_to_pkt_flags(staterr);
1580         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1581         head->ol_flags = pkt_flags;
1582 #endif /* RTE_NEXT_ABI */
1583
1584         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1585                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1586         else if (pkt_flags & PKT_RX_FDIR) {
1587                 head->hash.fdir.hash =
1588                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1589                                                           & IXGBE_ATR_HASH_MASK;
1590                 head->hash.fdir.id =
1591                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1592         }
1593 }
1594
1595 /**
1596  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1597  *
1598  * @rx_queue Rx queue handle
1599  * @rx_pkts table of received packets
1600  * @nb_pkts size of rx_pkts table
1601  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1602  *
1603  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1604  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1605  *
1606  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1607  * 1) When non-EOP RSC completion arrives:
1608  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1609  *       segment's data length.
1610  *    b) Set the "next" pointer of the current segment to point to the segment
1611  *       at the NEXTP index.
1612  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1613  *       in the sw_rsc_ring.
1614  * 2) When EOP arrives we just update the cluster's total length and offload
1615  *    flags and deliver the cluster up to the upper layers. In our case - put it
1616  *    in the rx_pkts table.
1617  *
1618  * Returns the number of received packets/clusters (according to the "bulk
1619  * receive" interface).
1620  */
1621 static inline uint16_t
1622 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1623                     bool bulk_alloc)
1624 {
1625         struct ixgbe_rx_queue *rxq = rx_queue;
1626         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1627         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1628         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1629         uint16_t rx_id = rxq->rx_tail;
1630         uint16_t nb_rx = 0;
1631         uint16_t nb_hold = rxq->nb_rx_hold;
1632         uint16_t prev_id = rxq->rx_tail;
1633
1634         while (nb_rx < nb_pkts) {
1635                 bool eop;
1636                 struct ixgbe_rx_entry *rxe;
1637                 struct ixgbe_scattered_rx_entry *sc_entry;
1638                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1639                 struct ixgbe_rx_entry *next_rxe;
1640                 struct rte_mbuf *first_seg;
1641                 struct rte_mbuf *rxm;
1642                 struct rte_mbuf *nmb;
1643                 union ixgbe_adv_rx_desc rxd;
1644                 uint16_t data_len;
1645                 uint16_t next_id;
1646                 volatile union ixgbe_adv_rx_desc *rxdp;
1647                 uint32_t staterr;
1648
1649 next_desc:
1650                 /*
1651                  * The code in this whole file uses the volatile pointer to
1652                  * ensure the read ordering of the status and the rest of the
1653                  * descriptor fields (on the compiler level only!!!). This is so
1654                  * UGLY - why not to just use the compiler barrier instead? DPDK
1655                  * even has the rte_compiler_barrier() for that.
1656                  *
1657                  * But most importantly this is just wrong because this doesn't
1658                  * ensure memory ordering in a general case at all. For
1659                  * instance, DPDK is supposed to work on Power CPUs where
1660                  * compiler barrier may just not be enough!
1661                  *
1662                  * I tried to write only this function properly to have a
1663                  * starting point (as a part of an LRO/RSC series) but the
1664                  * compiler cursed at me when I tried to cast away the
1665                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1666                  * keeping it the way it is for now.
1667                  *
1668                  * The code in this file is broken in so many other places and
1669                  * will just not work on a big endian CPU anyway therefore the
1670                  * lines below will have to be revisited together with the rest
1671                  * of the ixgbe PMD.
1672                  *
1673                  * TODO:
1674                  *    - Get rid of "volatile" crap and let the compiler do its
1675                  *      job.
1676                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1677                  *      memory ordering below.
1678                  */
1679                 rxdp = &rx_ring[rx_id];
1680                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1681
1682                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1683                         break;
1684
1685                 rxd = *rxdp;
1686
1687                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1688                                   "staterr=0x%x data_len=%u",
1689                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1690                            rte_le_to_cpu_16(rxd.wb.upper.length));
1691
1692                 if (!bulk_alloc) {
1693                         nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1694                         if (nmb == NULL) {
1695                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1696                                                   "port_id=%u queue_id=%u",
1697                                            rxq->port_id, rxq->queue_id);
1698
1699                                 rte_eth_devices[rxq->port_id].data->
1700                                                         rx_mbuf_alloc_failed++;
1701                                 break;
1702                         }
1703                 }
1704                 else if (nb_hold > rxq->rx_free_thresh) {
1705                         uint16_t next_rdt = rxq->rx_free_trigger;
1706
1707                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1708                                 rte_wmb();
1709                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1710                                                     next_rdt);
1711                                 nb_hold -= rxq->rx_free_thresh;
1712                         } else {
1713                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1714                                                   "port_id=%u queue_id=%u",
1715                                            rxq->port_id, rxq->queue_id);
1716
1717                                 rte_eth_devices[rxq->port_id].data->
1718                                                         rx_mbuf_alloc_failed++;
1719                                 break;
1720                         }
1721                 }
1722
1723                 nb_hold++;
1724                 rxe = &sw_ring[rx_id];
1725                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1726
1727                 next_id = rx_id + 1;
1728                 if (next_id == rxq->nb_rx_desc)
1729                         next_id = 0;
1730
1731                 /* Prefetch next mbuf while processing current one. */
1732                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1733
1734                 /*
1735                  * When next RX descriptor is on a cache-line boundary,
1736                  * prefetch the next 4 RX descriptors and the next 4 pointers
1737                  * to mbufs.
1738                  */
1739                 if ((next_id & 0x3) == 0) {
1740                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1741                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1742                 }
1743
1744                 rxm = rxe->mbuf;
1745
1746                 if (!bulk_alloc) {
1747                         __le64 dma =
1748                           rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1749                         /*
1750                          * Update RX descriptor with the physical address of the
1751                          * new data buffer of the new allocated mbuf.
1752                          */
1753                         rxe->mbuf = nmb;
1754
1755                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1756                         rxdp->read.hdr_addr = 0;
1757                         rxdp->read.pkt_addr = dma;
1758                 } else
1759                         rxe->mbuf = NULL;
1760
1761                 /*
1762                  * Set data length & data buffer address of mbuf.
1763                  */
1764                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1765                 rxm->data_len = data_len;
1766
1767                 if (!eop) {
1768                         uint16_t nextp_id;
1769                         /*
1770                          * Get next descriptor index:
1771                          *  - For RSC it's in the NEXTP field.
1772                          *  - For a scattered packet - it's just a following
1773                          *    descriptor.
1774                          */
1775                         if (ixgbe_rsc_count(&rxd))
1776                                 nextp_id =
1777                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1778                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1779                         else
1780                                 nextp_id = next_id;
1781
1782                         next_sc_entry = &sw_sc_ring[nextp_id];
1783                         next_rxe = &sw_ring[nextp_id];
1784                         rte_ixgbe_prefetch(next_rxe);
1785                 }
1786
1787                 sc_entry = &sw_sc_ring[rx_id];
1788                 first_seg = sc_entry->fbuf;
1789                 sc_entry->fbuf = NULL;
1790
1791                 /*
1792                  * If this is the first buffer of the received packet,
1793                  * set the pointer to the first mbuf of the packet and
1794                  * initialize its context.
1795                  * Otherwise, update the total length and the number of segments
1796                  * of the current scattered packet, and update the pointer to
1797                  * the last mbuf of the current packet.
1798                  */
1799                 if (first_seg == NULL) {
1800                         first_seg = rxm;
1801                         first_seg->pkt_len = data_len;
1802                         first_seg->nb_segs = 1;
1803                 } else {
1804                         first_seg->pkt_len += data_len;
1805                         first_seg->nb_segs++;
1806                 }
1807
1808                 prev_id = rx_id;
1809                 rx_id = next_id;
1810
1811                 /*
1812                  * If this is not the last buffer of the received packet, update
1813                  * the pointer to the first mbuf at the NEXTP entry in the
1814                  * sw_sc_ring and continue to parse the RX ring.
1815                  */
1816                 if (!eop) {
1817                         rxm->next = next_rxe->mbuf;
1818                         next_sc_entry->fbuf = first_seg;
1819                         goto next_desc;
1820                 }
1821
1822                 /*
1823                  * This is the last buffer of the received packet - return
1824                  * the current cluster to the user.
1825                  */
1826                 rxm->next = NULL;
1827
1828                 /* Initialize the first mbuf of the returned packet */
1829                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1830                                             staterr);
1831
1832                 /*
1833                  * Deal with the case, when HW CRC srip is disabled.
1834                  * That can't happen when LRO is enabled, but still could
1835                  * happen for scattered RX mode.
1836                  */
1837                 first_seg->pkt_len -= rxq->crc_len;
1838                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1839                         struct rte_mbuf *lp;
1840
1841                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
1842                                 ;
1843
1844                         first_seg->nb_segs--;
1845                         lp->data_len -= rxq->crc_len - rxm->data_len;
1846                         lp->next = NULL;
1847                         rte_pktmbuf_free_seg(rxm);
1848                 } else
1849                         rxm->data_len -= rxq->crc_len;
1850
1851                 /* Prefetch data of first segment, if configured to do so. */
1852                 rte_packet_prefetch((char *)first_seg->buf_addr +
1853                         first_seg->data_off);
1854
1855                 /*
1856                  * Store the mbuf address into the next entry of the array
1857                  * of returned packets.
1858                  */
1859                 rx_pkts[nb_rx++] = first_seg;
1860         }
1861
1862         /*
1863          * Record index of the next RX descriptor to probe.
1864          */
1865         rxq->rx_tail = rx_id;
1866
1867         /*
1868          * If the number of free RX descriptors is greater than the RX free
1869          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1870          * register.
1871          * Update the RDT with the value of the last processed RX descriptor
1872          * minus 1, to guarantee that the RDT register is never equal to the
1873          * RDH register, which creates a "full" ring situtation from the
1874          * hardware point of view...
1875          */
1876         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1877                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1878                            "nb_hold=%u nb_rx=%u",
1879                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1880
1881                 rte_wmb();
1882                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1883                 nb_hold = 0;
1884         }
1885
1886         rxq->nb_rx_hold = nb_hold;
1887         return nb_rx;
1888 }
1889
1890 uint16_t
1891 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1892                                  uint16_t nb_pkts)
1893 {
1894         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1895 }
1896
1897 uint16_t
1898 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1899                                uint16_t nb_pkts)
1900 {
1901         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1902 }
1903
1904 /*********************************************************************
1905  *
1906  *  Queue management functions
1907  *
1908  **********************************************************************/
1909
1910 /*
1911  * Rings setup and release.
1912  *
1913  * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1914  * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1915  * also optimize cache line size effect. H/W supports up to cache line size 128.
1916  */
1917 #define IXGBE_ALIGN 128
1918
1919 /*
1920  * Maximum number of Ring Descriptors.
1921  *
1922  * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1923  * descriptors should meet the following condition:
1924  *      (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1925  */
1926 #define IXGBE_MIN_RING_DESC 32
1927 #define IXGBE_MAX_RING_DESC 4096
1928
1929 /*
1930  * Create memzone for HW rings. malloc can't be used as the physical address is
1931  * needed. If the memzone is already created, then this function returns a ptr
1932  * to the old one.
1933  */
1934 static const struct rte_memzone * __attribute__((cold))
1935 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1936                       uint16_t queue_id, uint32_t ring_size, int socket_id)
1937 {
1938         char z_name[RTE_MEMZONE_NAMESIZE];
1939         const struct rte_memzone *mz;
1940
1941         snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1942                         dev->driver->pci_drv.name, ring_name,
1943                         dev->data->port_id, queue_id);
1944
1945         mz = rte_memzone_lookup(z_name);
1946         if (mz)
1947                 return mz;
1948
1949 #ifdef RTE_LIBRTE_XEN_DOM0
1950         return rte_memzone_reserve_bounded(z_name, ring_size,
1951                 socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
1952 #else
1953         return rte_memzone_reserve_aligned(z_name, ring_size,
1954                 socket_id, 0, IXGBE_ALIGN);
1955 #endif
1956 }
1957
1958 static void __attribute__((cold))
1959 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1960 {
1961         unsigned i;
1962
1963         if (txq->sw_ring != NULL) {
1964                 for (i = 0; i < txq->nb_tx_desc; i++) {
1965                         if (txq->sw_ring[i].mbuf != NULL) {
1966                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1967                                 txq->sw_ring[i].mbuf = NULL;
1968                         }
1969                 }
1970         }
1971 }
1972
1973 static void __attribute__((cold))
1974 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1975 {
1976         if (txq != NULL &&
1977             txq->sw_ring != NULL)
1978                 rte_free(txq->sw_ring);
1979 }
1980
1981 static void __attribute__((cold))
1982 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1983 {
1984         if (txq != NULL && txq->ops != NULL) {
1985                 txq->ops->release_mbufs(txq);
1986                 txq->ops->free_swring(txq);
1987                 rte_free(txq);
1988         }
1989 }
1990
1991 void __attribute__((cold))
1992 ixgbe_dev_tx_queue_release(void *txq)
1993 {
1994         ixgbe_tx_queue_release(txq);
1995 }
1996
1997 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1998 static void __attribute__((cold))
1999 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2000 {
2001         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2002         struct ixgbe_tx_entry *txe = txq->sw_ring;
2003         uint16_t prev, i;
2004
2005         /* Zero out HW ring memory */
2006         for (i = 0; i < txq->nb_tx_desc; i++) {
2007                 txq->tx_ring[i] = zeroed_desc;
2008         }
2009
2010         /* Initialize SW ring entries */
2011         prev = (uint16_t) (txq->nb_tx_desc - 1);
2012         for (i = 0; i < txq->nb_tx_desc; i++) {
2013                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2014                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2015                 txe[i].mbuf = NULL;
2016                 txe[i].last_id = i;
2017                 txe[prev].next_id = i;
2018                 prev = i;
2019         }
2020
2021         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2022         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2023
2024         txq->tx_tail = 0;
2025         txq->nb_tx_used = 0;
2026         /*
2027          * Always allow 1 descriptor to be un-allocated to avoid
2028          * a H/W race condition
2029          */
2030         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2031         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2032         txq->ctx_curr = 0;
2033         memset((void*)&txq->ctx_cache, 0,
2034                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2035 }
2036
2037 static const struct ixgbe_txq_ops def_txq_ops = {
2038         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2039         .free_swring = ixgbe_tx_free_swring,
2040         .reset = ixgbe_reset_tx_queue,
2041 };
2042
2043 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2044  * the queue parameters. Used in tx_queue_setup by primary process and then
2045  * in dev_init by secondary process when attaching to an existing ethdev.
2046  */
2047 void __attribute__((cold))
2048 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2049 {
2050         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2051         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2052                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2053                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2054 #ifdef RTE_IXGBE_INC_VECTOR
2055                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2056                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2057                                         ixgbe_txq_vec_setup(txq) == 0)) {
2058                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2059                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2060                 } else
2061 #endif
2062                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2063         } else {
2064                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2065                 PMD_INIT_LOG(DEBUG,
2066                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2067                                 (unsigned long)txq->txq_flags,
2068                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2069                 PMD_INIT_LOG(DEBUG,
2070                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2071                                 (unsigned long)txq->tx_rs_thresh,
2072                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2073                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2074         }
2075 }
2076
2077 int __attribute__((cold))
2078 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2079                          uint16_t queue_idx,
2080                          uint16_t nb_desc,
2081                          unsigned int socket_id,
2082                          const struct rte_eth_txconf *tx_conf)
2083 {
2084         const struct rte_memzone *tz;
2085         struct ixgbe_tx_queue *txq;
2086         struct ixgbe_hw     *hw;
2087         uint16_t tx_rs_thresh, tx_free_thresh;
2088
2089         PMD_INIT_FUNC_TRACE();
2090         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2091
2092         /*
2093          * Validate number of transmit descriptors.
2094          * It must not exceed hardware maximum, and must be multiple
2095          * of IXGBE_ALIGN.
2096          */
2097         if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
2098             (nb_desc > IXGBE_MAX_RING_DESC) ||
2099             (nb_desc < IXGBE_MIN_RING_DESC)) {
2100                 return -EINVAL;
2101         }
2102
2103         /*
2104          * The following two parameters control the setting of the RS bit on
2105          * transmit descriptors.
2106          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2107          * descriptors have been used.
2108          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2109          * descriptors are used or if the number of descriptors required
2110          * to transmit a packet is greater than the number of free TX
2111          * descriptors.
2112          * The following constraints must be satisfied:
2113          *  tx_rs_thresh must be greater than 0.
2114          *  tx_rs_thresh must be less than the size of the ring minus 2.
2115          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2116          *  tx_rs_thresh must be a divisor of the ring size.
2117          *  tx_free_thresh must be greater than 0.
2118          *  tx_free_thresh must be less than the size of the ring minus 3.
2119          * One descriptor in the TX ring is used as a sentinel to avoid a
2120          * H/W race condition, hence the maximum threshold constraints.
2121          * When set to zero use default values.
2122          */
2123         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2124                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2125         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2126                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2127         if (tx_rs_thresh >= (nb_desc - 2)) {
2128                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2129                              "of TX descriptors minus 2. (tx_rs_thresh=%u "
2130                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2131                              (int)dev->data->port_id, (int)queue_idx);
2132                 return -(EINVAL);
2133         }
2134         if (tx_free_thresh >= (nb_desc - 3)) {
2135                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2136                              "tx_free_thresh must be less than the number of "
2137                              "TX descriptors minus 3. (tx_free_thresh=%u "
2138                              "port=%d queue=%d)",
2139                              (unsigned int)tx_free_thresh,
2140                              (int)dev->data->port_id, (int)queue_idx);
2141                 return -(EINVAL);
2142         }
2143         if (tx_rs_thresh > tx_free_thresh) {
2144                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2145                              "tx_free_thresh. (tx_free_thresh=%u "
2146                              "tx_rs_thresh=%u port=%d queue=%d)",
2147                              (unsigned int)tx_free_thresh,
2148                              (unsigned int)tx_rs_thresh,
2149                              (int)dev->data->port_id,
2150                              (int)queue_idx);
2151                 return -(EINVAL);
2152         }
2153         if ((nb_desc % tx_rs_thresh) != 0) {
2154                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2155                              "number of TX descriptors. (tx_rs_thresh=%u "
2156                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2157                              (int)dev->data->port_id, (int)queue_idx);
2158                 return -(EINVAL);
2159         }
2160
2161         /*
2162          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2163          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2164          * by the NIC and all descriptors are written back after the NIC
2165          * accumulates WTHRESH descriptors.
2166          */
2167         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2168                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2169                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2170                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2171                              (int)dev->data->port_id, (int)queue_idx);
2172                 return -(EINVAL);
2173         }
2174
2175         /* Free memory prior to re-allocation if needed... */
2176         if (dev->data->tx_queues[queue_idx] != NULL) {
2177                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2178                 dev->data->tx_queues[queue_idx] = NULL;
2179         }
2180
2181         /* First allocate the tx queue data structure */
2182         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2183                                  RTE_CACHE_LINE_SIZE, socket_id);
2184         if (txq == NULL)
2185                 return (-ENOMEM);
2186
2187         /*
2188          * Allocate TX ring hardware descriptors. A memzone large enough to
2189          * handle the maximum ring size is allocated in order to allow for
2190          * resizing in later calls to the queue setup function.
2191          */
2192         tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
2193                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2194                         socket_id);
2195         if (tz == NULL) {
2196                 ixgbe_tx_queue_release(txq);
2197                 return (-ENOMEM);
2198         }
2199
2200         txq->nb_tx_desc = nb_desc;
2201         txq->tx_rs_thresh = tx_rs_thresh;
2202         txq->tx_free_thresh = tx_free_thresh;
2203         txq->pthresh = tx_conf->tx_thresh.pthresh;
2204         txq->hthresh = tx_conf->tx_thresh.hthresh;
2205         txq->wthresh = tx_conf->tx_thresh.wthresh;
2206         txq->queue_id = queue_idx;
2207         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2208                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2209         txq->port_id = dev->data->port_id;
2210         txq->txq_flags = tx_conf->txq_flags;
2211         txq->ops = &def_txq_ops;
2212         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2213
2214         /*
2215          * Modification to set VFTDT for virtual function if vf is detected
2216          */
2217         if (hw->mac.type == ixgbe_mac_82599_vf ||
2218             hw->mac.type == ixgbe_mac_X540_vf ||
2219             hw->mac.type == ixgbe_mac_X550_vf ||
2220             hw->mac.type == ixgbe_mac_X550EM_x_vf)
2221                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2222         else
2223                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2224 #ifndef RTE_LIBRTE_XEN_DOM0
2225         txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
2226 #else
2227         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2228 #endif
2229         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2230
2231         /* Allocate software ring */
2232         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2233                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2234                                 RTE_CACHE_LINE_SIZE, socket_id);
2235         if (txq->sw_ring == NULL) {
2236                 ixgbe_tx_queue_release(txq);
2237                 return (-ENOMEM);
2238         }
2239         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2240                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2241
2242         /* set up vector or scalar TX function as appropriate */
2243         ixgbe_set_tx_function(dev, txq);
2244
2245         txq->ops->reset(txq);
2246
2247         dev->data->tx_queues[queue_idx] = txq;
2248
2249
2250         return (0);
2251 }
2252
2253 /**
2254  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2255  *
2256  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2257  * in the sw_rsc_ring is not set to NULL but rather points to the next
2258  * mbuf of this RSC aggregation (that has not been completed yet and still
2259  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2260  * will just free first "nb_segs" segments of the cluster explicitly by calling
2261  * an rte_pktmbuf_free_seg().
2262  *
2263  * @m scattered cluster head
2264  */
2265 static void __attribute__((cold))
2266 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2267 {
2268         uint8_t i, nb_segs = m->nb_segs;
2269         struct rte_mbuf *next_seg;
2270
2271         for (i = 0; i < nb_segs; i++) {
2272                 next_seg = m->next;
2273                 rte_pktmbuf_free_seg(m);
2274                 m = next_seg;
2275         }
2276 }
2277
2278 static void __attribute__((cold))
2279 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2280 {
2281         unsigned i;
2282
2283 #ifdef RTE_IXGBE_INC_VECTOR
2284         /* SSE Vector driver has a different way of releasing mbufs. */
2285         if (rxq->rx_using_sse) {
2286                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2287                 return;
2288         }
2289 #endif
2290
2291         if (rxq->sw_ring != NULL) {
2292                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2293                         if (rxq->sw_ring[i].mbuf != NULL) {
2294                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2295                                 rxq->sw_ring[i].mbuf = NULL;
2296                         }
2297                 }
2298                 if (rxq->rx_nb_avail) {
2299                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2300                                 struct rte_mbuf *mb;
2301                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2302                                 rte_pktmbuf_free_seg(mb);
2303                         }
2304                         rxq->rx_nb_avail = 0;
2305                 }
2306         }
2307
2308         if (rxq->sw_sc_ring)
2309                 for (i = 0; i < rxq->nb_rx_desc; i++)
2310                         if (rxq->sw_sc_ring[i].fbuf) {
2311                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2312                                 rxq->sw_sc_ring[i].fbuf = NULL;
2313                         }
2314 }
2315
2316 static void __attribute__((cold))
2317 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2318 {
2319         if (rxq != NULL) {
2320                 ixgbe_rx_queue_release_mbufs(rxq);
2321                 rte_free(rxq->sw_ring);
2322                 rte_free(rxq->sw_sc_ring);
2323                 rte_free(rxq);
2324         }
2325 }
2326
2327 void __attribute__((cold))
2328 ixgbe_dev_rx_queue_release(void *rxq)
2329 {
2330         ixgbe_rx_queue_release(rxq);
2331 }
2332
2333 /*
2334  * Check if Rx Burst Bulk Alloc function can be used.
2335  * Return
2336  *        0: the preconditions are satisfied and the bulk allocation function
2337  *           can be used.
2338  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2339  *           function must be used.
2340  */
2341 static inline int __attribute__((cold))
2342 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2343 {
2344         int ret = 0;
2345
2346         /*
2347          * Make sure the following pre-conditions are satisfied:
2348          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2349          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2350          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2351          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2352          * Scattered packets are not supported.  This should be checked
2353          * outside of this function.
2354          */
2355         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2356                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2357                              "rxq->rx_free_thresh=%d, "
2358                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2359                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2360                 ret = -EINVAL;
2361         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2362                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2363                              "rxq->rx_free_thresh=%d, "
2364                              "rxq->nb_rx_desc=%d",
2365                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2366                 ret = -EINVAL;
2367         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2368                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2369                              "rxq->nb_rx_desc=%d, "
2370                              "rxq->rx_free_thresh=%d",
2371                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2372                 ret = -EINVAL;
2373         } else if (!(rxq->nb_rx_desc <
2374                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2375                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2376                              "rxq->nb_rx_desc=%d, "
2377                              "IXGBE_MAX_RING_DESC=%d, "
2378                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2379                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2380                              RTE_PMD_IXGBE_RX_MAX_BURST);
2381                 ret = -EINVAL;
2382         }
2383
2384         return ret;
2385 }
2386
2387 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2388 static void __attribute__((cold))
2389 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2390 {
2391         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2392         unsigned i;
2393         uint16_t len = rxq->nb_rx_desc;
2394
2395         /*
2396          * By default, the Rx queue setup function allocates enough memory for
2397          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2398          * extra memory at the end of the descriptor ring to be zero'd out. A
2399          * pre-condition for using the Rx burst bulk alloc function is that the
2400          * number of descriptors is less than or equal to
2401          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2402          * constraints here to see if we need to zero out memory after the end
2403          * of the H/W descriptor ring.
2404          */
2405         if (adapter->rx_bulk_alloc_allowed)
2406                 /* zero out extra memory */
2407                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2408
2409         /*
2410          * Zero out HW ring memory. Zero out extra memory at the end of
2411          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2412          * reads extra memory as zeros.
2413          */
2414         for (i = 0; i < len; i++) {
2415                 rxq->rx_ring[i] = zeroed_desc;
2416         }
2417
2418         /*
2419          * initialize extra software ring entries. Space for these extra
2420          * entries is always allocated
2421          */
2422         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2423         for (i = rxq->nb_rx_desc; i < len; ++i) {
2424                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2425         }
2426
2427         rxq->rx_nb_avail = 0;
2428         rxq->rx_next_avail = 0;
2429         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2430         rxq->rx_tail = 0;
2431         rxq->nb_rx_hold = 0;
2432         rxq->pkt_first_seg = NULL;
2433         rxq->pkt_last_seg = NULL;
2434
2435 #ifdef RTE_IXGBE_INC_VECTOR
2436         rxq->rxrearm_start = 0;
2437         rxq->rxrearm_nb = 0;
2438 #endif
2439 }
2440
2441 int __attribute__((cold))
2442 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2443                          uint16_t queue_idx,
2444                          uint16_t nb_desc,
2445                          unsigned int socket_id,
2446                          const struct rte_eth_rxconf *rx_conf,
2447                          struct rte_mempool *mp)
2448 {
2449         const struct rte_memzone *rz;
2450         struct ixgbe_rx_queue *rxq;
2451         struct ixgbe_hw     *hw;
2452         uint16_t len;
2453         struct ixgbe_adapter *adapter =
2454                 (struct ixgbe_adapter *)dev->data->dev_private;
2455
2456         PMD_INIT_FUNC_TRACE();
2457         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2458
2459         /*
2460          * Validate number of receive descriptors.
2461          * It must not exceed hardware maximum, and must be multiple
2462          * of IXGBE_ALIGN.
2463          */
2464         if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2465             (nb_desc > IXGBE_MAX_RING_DESC) ||
2466             (nb_desc < IXGBE_MIN_RING_DESC)) {
2467                 return (-EINVAL);
2468         }
2469
2470         /* Free memory prior to re-allocation if needed... */
2471         if (dev->data->rx_queues[queue_idx] != NULL) {
2472                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2473                 dev->data->rx_queues[queue_idx] = NULL;
2474         }
2475
2476         /* First allocate the rx queue data structure */
2477         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2478                                  RTE_CACHE_LINE_SIZE, socket_id);
2479         if (rxq == NULL)
2480                 return (-ENOMEM);
2481         rxq->mb_pool = mp;
2482         rxq->nb_rx_desc = nb_desc;
2483         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2484         rxq->queue_id = queue_idx;
2485         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2486                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2487         rxq->port_id = dev->data->port_id;
2488         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2489                                                         0 : ETHER_CRC_LEN);
2490         rxq->drop_en = rx_conf->rx_drop_en;
2491         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2492
2493         /*
2494          * Allocate RX ring hardware descriptors. A memzone large enough to
2495          * handle the maximum ring size is allocated in order to allow for
2496          * resizing in later calls to the queue setup function.
2497          */
2498         rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2499                                    RX_RING_SZ, socket_id);
2500         if (rz == NULL) {
2501                 ixgbe_rx_queue_release(rxq);
2502                 return (-ENOMEM);
2503         }
2504
2505         /*
2506          * Zero init all the descriptors in the ring.
2507          */
2508         memset (rz->addr, 0, RX_RING_SZ);
2509
2510         /*
2511          * Modified to setup VFRDT for Virtual Function
2512          */
2513         if (hw->mac.type == ixgbe_mac_82599_vf ||
2514             hw->mac.type == ixgbe_mac_X540_vf ||
2515             hw->mac.type == ixgbe_mac_X550_vf ||
2516             hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2517                 rxq->rdt_reg_addr =
2518                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2519                 rxq->rdh_reg_addr =
2520                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2521         }
2522         else {
2523                 rxq->rdt_reg_addr =
2524                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2525                 rxq->rdh_reg_addr =
2526                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2527         }
2528 #ifndef RTE_LIBRTE_XEN_DOM0
2529         rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2530 #else
2531         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2532 #endif
2533         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2534
2535         /*
2536          * Certain constraints must be met in order to use the bulk buffer
2537          * allocation Rx burst function. If any of Rx queues doesn't meet them
2538          * the feature should be disabled for the whole port.
2539          */
2540         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2541                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2542                                     "preconditions - canceling the feature for "
2543                                     "the whole port[%d]",
2544                              rxq->queue_id, rxq->port_id);
2545                 adapter->rx_bulk_alloc_allowed = false;
2546         }
2547
2548         /*
2549          * Allocate software ring. Allow for space at the end of the
2550          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2551          * function does not access an invalid memory region.
2552          */
2553         len = nb_desc;
2554         if (adapter->rx_bulk_alloc_allowed)
2555                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2556
2557         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2558                                           sizeof(struct ixgbe_rx_entry) * len,
2559                                           RTE_CACHE_LINE_SIZE, socket_id);
2560         if (!rxq->sw_ring) {
2561                 ixgbe_rx_queue_release(rxq);
2562                 return (-ENOMEM);
2563         }
2564
2565         /*
2566          * Always allocate even if it's not going to be needed in order to
2567          * simplify the code.
2568          *
2569          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2570          * be requested in ixgbe_dev_rx_init(), which is called later from
2571          * dev_start() flow.
2572          */
2573         rxq->sw_sc_ring =
2574                 rte_zmalloc_socket("rxq->sw_sc_ring",
2575                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2576                                    RTE_CACHE_LINE_SIZE, socket_id);
2577         if (!rxq->sw_sc_ring) {
2578                 ixgbe_rx_queue_release(rxq);
2579                 return (-ENOMEM);
2580         }
2581
2582         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2583                             "dma_addr=0x%"PRIx64,
2584                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2585                      rxq->rx_ring_phys_addr);
2586
2587         if (!rte_is_power_of_2(nb_desc)) {
2588                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2589                                     "preconditions - canceling the feature for "
2590                                     "the whole port[%d]",
2591                              rxq->queue_id, rxq->port_id);
2592                 adapter->rx_vec_allowed = false;
2593         } else
2594                 ixgbe_rxq_vec_setup(rxq);
2595
2596         dev->data->rx_queues[queue_idx] = rxq;
2597
2598         ixgbe_reset_rx_queue(adapter, rxq);
2599
2600         return 0;
2601 }
2602
2603 uint32_t
2604 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2605 {
2606 #define IXGBE_RXQ_SCAN_INTERVAL 4
2607         volatile union ixgbe_adv_rx_desc *rxdp;
2608         struct ixgbe_rx_queue *rxq;
2609         uint32_t desc = 0;
2610
2611         if (rx_queue_id >= dev->data->nb_rx_queues) {
2612                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2613                 return 0;
2614         }
2615
2616         rxq = dev->data->rx_queues[rx_queue_id];
2617         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2618
2619         while ((desc < rxq->nb_rx_desc) &&
2620                 (rxdp->wb.upper.status_error &
2621                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2622                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2623                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2624                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2625                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2626                                 desc - rxq->nb_rx_desc]);
2627         }
2628
2629         return desc;
2630 }
2631
2632 int
2633 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2634 {
2635         volatile union ixgbe_adv_rx_desc *rxdp;
2636         struct ixgbe_rx_queue *rxq = rx_queue;
2637         uint32_t desc;
2638
2639         if (unlikely(offset >= rxq->nb_rx_desc))
2640                 return 0;
2641         desc = rxq->rx_tail + offset;
2642         if (desc >= rxq->nb_rx_desc)
2643                 desc -= rxq->nb_rx_desc;
2644
2645         rxdp = &rxq->rx_ring[desc];
2646         return !!(rxdp->wb.upper.status_error &
2647                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2648 }
2649
2650 void __attribute__((cold))
2651 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2652 {
2653         unsigned i;
2654         struct ixgbe_adapter *adapter =
2655                 (struct ixgbe_adapter *)dev->data->dev_private;
2656
2657         PMD_INIT_FUNC_TRACE();
2658
2659         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2660                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2661                 if (txq != NULL) {
2662                         txq->ops->release_mbufs(txq);
2663                         txq->ops->reset(txq);
2664                 }
2665         }
2666
2667         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2668                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2669                 if (rxq != NULL) {
2670                         ixgbe_rx_queue_release_mbufs(rxq);
2671                         ixgbe_reset_rx_queue(adapter, rxq);
2672                 }
2673         }
2674 }
2675
2676 void
2677 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2678 {
2679         unsigned i;
2680
2681         PMD_INIT_FUNC_TRACE();
2682
2683         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2684                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2685                 dev->data->rx_queues[i] = NULL;
2686         }
2687         dev->data->nb_rx_queues = 0;
2688
2689         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2690                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2691                 dev->data->tx_queues[i] = NULL;
2692         }
2693         dev->data->nb_tx_queues = 0;
2694 }
2695
2696 /*********************************************************************
2697  *
2698  *  Device RX/TX init functions
2699  *
2700  **********************************************************************/
2701
2702 /**
2703  * Receive Side Scaling (RSS)
2704  * See section 7.1.2.8 in the following document:
2705  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2706  *
2707  * Principles:
2708  * The source and destination IP addresses of the IP header and the source
2709  * and destination ports of TCP/UDP headers, if any, of received packets are
2710  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2711  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2712  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2713  * RSS output index which is used as the RX queue index where to store the
2714  * received packets.
2715  * The following output is supplied in the RX write-back descriptor:
2716  *     - 32-bit result of the Microsoft RSS hash function,
2717  *     - 4-bit RSS type field.
2718  */
2719
2720 /*
2721  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2722  * Used as the default key.
2723  */
2724 static uint8_t rss_intel_key[40] = {
2725         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2726         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2727         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2728         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2729         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2730 };
2731
2732 static void
2733 ixgbe_rss_disable(struct rte_eth_dev *dev)
2734 {
2735         struct ixgbe_hw *hw;
2736         uint32_t mrqc;
2737
2738         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2739         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2740         mrqc &= ~IXGBE_MRQC_RSSEN;
2741         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2742 }
2743
2744 static void
2745 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2746 {
2747         uint8_t  *hash_key;
2748         uint32_t mrqc;
2749         uint32_t rss_key;
2750         uint64_t rss_hf;
2751         uint16_t i;
2752
2753         hash_key = rss_conf->rss_key;
2754         if (hash_key != NULL) {
2755                 /* Fill in RSS hash key */
2756                 for (i = 0; i < 10; i++) {
2757                         rss_key  = hash_key[(i * 4)];
2758                         rss_key |= hash_key[(i * 4) + 1] << 8;
2759                         rss_key |= hash_key[(i * 4) + 2] << 16;
2760                         rss_key |= hash_key[(i * 4) + 3] << 24;
2761                         IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2762                 }
2763         }
2764
2765         /* Set configured hashing protocols in MRQC register */
2766         rss_hf = rss_conf->rss_hf;
2767         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2768         if (rss_hf & ETH_RSS_IPV4)
2769                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2770         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2771                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2772         if (rss_hf & ETH_RSS_IPV6)
2773                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2774         if (rss_hf & ETH_RSS_IPV6_EX)
2775                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2776         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2777                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2778         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2779                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2780         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2781                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2782         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2783                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2784         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2785                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2786         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2787 }
2788
2789 int
2790 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2791                           struct rte_eth_rss_conf *rss_conf)
2792 {
2793         struct ixgbe_hw *hw;
2794         uint32_t mrqc;
2795         uint64_t rss_hf;
2796
2797         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2798
2799         /*
2800          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2801          *     "RSS enabling cannot be done dynamically while it must be
2802          *      preceded by a software reset"
2803          * Before changing anything, first check that the update RSS operation
2804          * does not attempt to disable RSS, if RSS was enabled at
2805          * initialization time, or does not attempt to enable RSS, if RSS was
2806          * disabled at initialization time.
2807          */
2808         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2809         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2810         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2811                 if (rss_hf != 0) /* Enable RSS */
2812                         return -(EINVAL);
2813                 return 0; /* Nothing to do */
2814         }
2815         /* RSS enabled */
2816         if (rss_hf == 0) /* Disable RSS */
2817                 return -(EINVAL);
2818         ixgbe_hw_rss_hash_set(hw, rss_conf);
2819         return 0;
2820 }
2821
2822 int
2823 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2824                             struct rte_eth_rss_conf *rss_conf)
2825 {
2826         struct ixgbe_hw *hw;
2827         uint8_t *hash_key;
2828         uint32_t mrqc;
2829         uint32_t rss_key;
2830         uint64_t rss_hf;
2831         uint16_t i;
2832
2833         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2834         hash_key = rss_conf->rss_key;
2835         if (hash_key != NULL) {
2836                 /* Return RSS hash key */
2837                 for (i = 0; i < 10; i++) {
2838                         rss_key = IXGBE_READ_REG_ARRAY(hw, IXGBE_RSSRK(0), i);
2839                         hash_key[(i * 4)] = rss_key & 0x000000FF;
2840                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2841                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2842                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2843                 }
2844         }
2845
2846         /* Get RSS functions configured in MRQC register */
2847         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2848         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2849                 rss_conf->rss_hf = 0;
2850                 return 0;
2851         }
2852         rss_hf = 0;
2853         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2854                 rss_hf |= ETH_RSS_IPV4;
2855         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2856                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2857         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2858                 rss_hf |= ETH_RSS_IPV6;
2859         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2860                 rss_hf |= ETH_RSS_IPV6_EX;
2861         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2862                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2863         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2864                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2865         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2866                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2867         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2868                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2869         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2870                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2871         rss_conf->rss_hf = rss_hf;
2872         return 0;
2873 }
2874
2875 static void
2876 ixgbe_rss_configure(struct rte_eth_dev *dev)
2877 {
2878         struct rte_eth_rss_conf rss_conf;
2879         struct ixgbe_hw *hw;
2880         uint32_t reta;
2881         uint16_t i;
2882         uint16_t j;
2883
2884         PMD_INIT_FUNC_TRACE();
2885         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2886
2887         /*
2888          * Fill in redirection table
2889          * The byte-swap is needed because NIC registers are in
2890          * little-endian order.
2891          */
2892         reta = 0;
2893         for (i = 0, j = 0; i < 128; i++, j++) {
2894                 if (j == dev->data->nb_rx_queues)
2895                         j = 0;
2896                 reta = (reta << 8) | j;
2897                 if ((i & 3) == 3)
2898                         IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2),
2899                                         rte_bswap32(reta));
2900         }
2901
2902         /*
2903          * Configure the RSS key and the RSS protocols used to compute
2904          * the RSS hash of input packets.
2905          */
2906         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2907         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2908                 ixgbe_rss_disable(dev);
2909                 return;
2910         }
2911         if (rss_conf.rss_key == NULL)
2912                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2913         ixgbe_hw_rss_hash_set(hw, &rss_conf);
2914 }
2915
2916 #define NUM_VFTA_REGISTERS 128
2917 #define NIC_RX_BUFFER_SIZE 0x200
2918
2919 static void
2920 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2921 {
2922         struct rte_eth_vmdq_dcb_conf *cfg;
2923         struct ixgbe_hw *hw;
2924         enum rte_eth_nb_pools num_pools;
2925         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2926         uint16_t pbsize;
2927         uint8_t nb_tcs; /* number of traffic classes */
2928         int i;
2929
2930         PMD_INIT_FUNC_TRACE();
2931         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2932         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2933         num_pools = cfg->nb_queue_pools;
2934         /* Check we have a valid number of pools */
2935         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2936                 ixgbe_rss_disable(dev);
2937                 return;
2938         }
2939         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2940         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2941
2942         /*
2943          * RXPBSIZE
2944          * split rx buffer up into sections, each for 1 traffic class
2945          */
2946         pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2947         for (i = 0 ; i < nb_tcs; i++) {
2948                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2949                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2950                 /* clear 10 bits. */
2951                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2952                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2953         }
2954         /* zero alloc all unused TCs */
2955         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2956                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2957                 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2958                 /* clear 10 bits. */
2959                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2960         }
2961
2962         /* MRQC: enable vmdq and dcb */
2963         mrqc = ((num_pools == ETH_16_POOLS) ? \
2964                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2965         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2966
2967         /* PFVTCTL: turn on virtualisation and set the default pool */
2968         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2969         if (cfg->enable_default_pool) {
2970                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2971         } else {
2972                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2973         }
2974
2975         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2976
2977         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2978         queue_mapping = 0;
2979         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2980                 /*
2981                  * mapping is done with 3 bits per priority,
2982                  * so shift by i*3 each time
2983                  */
2984                 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2985
2986         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2987
2988         /* RTRPCS: DCB related */
2989         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2990
2991         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2992         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2993         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2994         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2995
2996         /* VFTA - enable all vlan filters */
2997         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2998                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2999         }
3000
3001         /* VFRE: pool enabling for receive - 16 or 32 */
3002         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
3003                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3004
3005         /*
3006          * MPSAR - allow pools to read specific mac addresses
3007          * In this case, all pools should be able to read from mac addr 0
3008          */
3009         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3010         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3011
3012         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3013         for (i = 0; i < cfg->nb_pool_maps; i++) {
3014                 /* set vlan id in VF register and set the valid bit */
3015                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3016                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3017                 /*
3018                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3019                  * pools, we only need to use the first half of the register
3020                  * i.e. bits 0-31
3021                  */
3022                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3023         }
3024 }
3025
3026 /**
3027  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3028  * @hw: pointer to hardware structure
3029  * @dcb_config: pointer to ixgbe_dcb_config structure
3030  */
3031 static void
3032 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3033                struct ixgbe_dcb_config *dcb_config)
3034 {
3035         uint32_t reg;
3036         uint32_t q;
3037
3038         PMD_INIT_FUNC_TRACE();
3039         if (hw->mac.type != ixgbe_mac_82598EB) {
3040                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3041                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3042                 reg |= IXGBE_RTTDCS_ARBDIS;
3043                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3044
3045                 /* Enable DCB for Tx with 8 TCs */
3046                 if (dcb_config->num_tcs.pg_tcs == 8) {
3047                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3048                 }
3049                 else {
3050                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3051                 }
3052                 if (dcb_config->vt_mode)
3053                     reg |= IXGBE_MTQC_VT_ENA;
3054                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3055
3056                 /* Disable drop for all queues */
3057                 for (q = 0; q < 128; q++)
3058                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3059                      (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3060
3061                 /* Enable the Tx desc arbiter */
3062                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3063                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3064                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3065
3066                 /* Enable Security TX Buffer IFG for DCB */
3067                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3068                 reg |= IXGBE_SECTX_DCB;
3069                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3070         }
3071         return;
3072 }
3073
3074 /**
3075  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3076  * @dev: pointer to rte_eth_dev structure
3077  * @dcb_config: pointer to ixgbe_dcb_config structure
3078  */
3079 static void
3080 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3081                         struct ixgbe_dcb_config *dcb_config)
3082 {
3083         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3084                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3085         struct ixgbe_hw *hw =
3086                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3087
3088         PMD_INIT_FUNC_TRACE();
3089         if (hw->mac.type != ixgbe_mac_82598EB)
3090                 /*PF VF Transmit Enable*/
3091                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3092                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3093
3094         /*Configure general DCB TX parameters*/
3095         ixgbe_dcb_tx_hw_config(hw,dcb_config);
3096         return;
3097 }
3098
3099 static void
3100 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3101                         struct ixgbe_dcb_config *dcb_config)
3102 {
3103         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3104                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3105         struct ixgbe_dcb_tc_config *tc;
3106         uint8_t i,j;
3107
3108         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3109         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3110                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3111                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3112         }
3113         else {
3114                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3115                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3116         }
3117         /* User Priority to Traffic Class mapping */
3118         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3119                 j = vmdq_rx_conf->dcb_queue[i];
3120                 tc = &dcb_config->tc_config[j];
3121                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3122                                                 (uint8_t)(1 << j);
3123         }
3124 }
3125
3126 static void
3127 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3128                         struct ixgbe_dcb_config *dcb_config)
3129 {
3130         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3131                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3132         struct ixgbe_dcb_tc_config *tc;
3133         uint8_t i,j;
3134
3135         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3136         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3137                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3138                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3139         }
3140         else {
3141                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3142                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3143         }
3144
3145         /* User Priority to Traffic Class mapping */
3146         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3147                 j = vmdq_tx_conf->dcb_queue[i];
3148                 tc = &dcb_config->tc_config[j];
3149                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3150                                                 (uint8_t)(1 << j);
3151         }
3152         return;
3153 }
3154
3155 static void
3156 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3157                 struct ixgbe_dcb_config *dcb_config)
3158 {
3159         struct rte_eth_dcb_rx_conf *rx_conf =
3160                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3161         struct ixgbe_dcb_tc_config *tc;
3162         uint8_t i,j;
3163
3164         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3165         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3166
3167         /* User Priority to Traffic Class mapping */
3168         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3169                 j = rx_conf->dcb_queue[i];
3170                 tc = &dcb_config->tc_config[j];
3171                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3172                                                 (uint8_t)(1 << j);
3173         }
3174 }
3175
3176 static void
3177 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3178                 struct ixgbe_dcb_config *dcb_config)
3179 {
3180         struct rte_eth_dcb_tx_conf *tx_conf =
3181                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3182         struct ixgbe_dcb_tc_config *tc;
3183         uint8_t i,j;
3184
3185         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3186         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3187
3188         /* User Priority to Traffic Class mapping */
3189         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3190                 j = tx_conf->dcb_queue[i];
3191                 tc = &dcb_config->tc_config[j];
3192                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3193                                                 (uint8_t)(1 << j);
3194         }
3195 }
3196
3197 /**
3198  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3199  * @hw: pointer to hardware structure
3200  * @dcb_config: pointer to ixgbe_dcb_config structure
3201  */
3202 static void
3203 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3204                struct ixgbe_dcb_config *dcb_config)
3205 {
3206         uint32_t reg;
3207         uint32_t vlanctrl;
3208         uint8_t i;
3209
3210         PMD_INIT_FUNC_TRACE();
3211         /*
3212          * Disable the arbiter before changing parameters
3213          * (always enable recycle mode; WSP)
3214          */
3215         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3216         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3217
3218         if (hw->mac.type != ixgbe_mac_82598EB) {
3219                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3220                 if (dcb_config->num_tcs.pg_tcs == 4) {
3221                         if (dcb_config->vt_mode)
3222                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3223                                         IXGBE_MRQC_VMDQRT4TCEN;
3224                         else {
3225                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3226                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3227                                         IXGBE_MRQC_RT4TCEN;
3228                         }
3229                 }
3230                 if (dcb_config->num_tcs.pg_tcs == 8) {
3231                         if (dcb_config->vt_mode)
3232                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3233                                         IXGBE_MRQC_VMDQRT8TCEN;
3234                         else {
3235                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3236                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3237                                         IXGBE_MRQC_RT8TCEN;
3238                         }
3239                 }
3240
3241                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3242         }
3243
3244         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3245         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3246         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3247         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3248
3249         /* VFTA - enable all vlan filters */
3250         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3251                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3252         }
3253
3254         /*
3255          * Configure Rx packet plane (recycle mode; WSP) and
3256          * enable arbiter
3257          */
3258         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3259         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3260
3261         return;
3262 }
3263
3264 static void
3265 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3266                         uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3267 {
3268         switch (hw->mac.type) {
3269         case ixgbe_mac_82598EB:
3270                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3271                 break;
3272         case ixgbe_mac_82599EB:
3273         case ixgbe_mac_X540:
3274         case ixgbe_mac_X550:
3275         case ixgbe_mac_X550EM_x:
3276                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3277                                                   tsa, map);
3278                 break;
3279         default:
3280                 break;
3281         }
3282 }
3283
3284 static void
3285 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3286                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3287 {
3288         switch (hw->mac.type) {
3289         case ixgbe_mac_82598EB:
3290                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3291                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3292                 break;
3293         case ixgbe_mac_82599EB:
3294         case ixgbe_mac_X540:
3295         case ixgbe_mac_X550:
3296         case ixgbe_mac_X550EM_x:
3297                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3298                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3299                 break;
3300         default:
3301                 break;
3302         }
3303 }
3304
3305 #define DCB_RX_CONFIG  1
3306 #define DCB_TX_CONFIG  1
3307 #define DCB_TX_PB      1024
3308 /**
3309  * ixgbe_dcb_hw_configure - Enable DCB and configure
3310  * general DCB in VT mode and non-VT mode parameters
3311  * @dev: pointer to rte_eth_dev structure
3312  * @dcb_config: pointer to ixgbe_dcb_config structure
3313  */
3314 static int
3315 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3316                         struct ixgbe_dcb_config *dcb_config)
3317 {
3318         int     ret = 0;
3319         uint8_t i,pfc_en,nb_tcs;
3320         uint16_t pbsize;
3321         uint8_t config_dcb_rx = 0;
3322         uint8_t config_dcb_tx = 0;
3323         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3324         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3325         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3326         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3327         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3328         struct ixgbe_dcb_tc_config *tc;
3329         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3330         struct ixgbe_hw *hw =
3331                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3332
3333         switch(dev->data->dev_conf.rxmode.mq_mode){
3334         case ETH_MQ_RX_VMDQ_DCB:
3335                 dcb_config->vt_mode = true;
3336                 if (hw->mac.type != ixgbe_mac_82598EB) {
3337                         config_dcb_rx = DCB_RX_CONFIG;
3338                         /*
3339                          *get dcb and VT rx configuration parameters
3340                          *from rte_eth_conf
3341                          */
3342                         ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
3343                         /*Configure general VMDQ and DCB RX parameters*/
3344                         ixgbe_vmdq_dcb_configure(dev);
3345                 }
3346                 break;
3347         case ETH_MQ_RX_DCB:
3348                 dcb_config->vt_mode = false;
3349                 config_dcb_rx = DCB_RX_CONFIG;
3350                 /* Get dcb TX configuration parameters from rte_eth_conf */
3351                 ixgbe_dcb_rx_config(dev,dcb_config);
3352                 /*Configure general DCB RX parameters*/
3353                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3354                 break;
3355         default:
3356                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3357                 break;
3358         }
3359         switch (dev->data->dev_conf.txmode.mq_mode) {
3360         case ETH_MQ_TX_VMDQ_DCB:
3361                 dcb_config->vt_mode = true;
3362                 config_dcb_tx = DCB_TX_CONFIG;
3363                 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3364                 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3365                 /*Configure general VMDQ and DCB TX parameters*/
3366                 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3367                 break;
3368
3369         case ETH_MQ_TX_DCB:
3370                 dcb_config->vt_mode = false;
3371                 config_dcb_tx = DCB_TX_CONFIG;
3372                 /*get DCB TX configuration parameters from rte_eth_conf*/
3373                 ixgbe_dcb_tx_config(dev,dcb_config);
3374                 /*Configure general DCB TX parameters*/
3375                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3376                 break;
3377         default:
3378                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3379                 break;
3380         }
3381
3382         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3383         /* Unpack map */
3384         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3385         if(nb_tcs == ETH_4_TCS) {
3386                 /* Avoid un-configured priority mapping to TC0 */
3387                 uint8_t j = 4;
3388                 uint8_t mask = 0xFF;
3389                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3390                         mask = (uint8_t)(mask & (~ (1 << map[i])));
3391                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3392                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3393                                 map[j++] = i;
3394                         mask >>= 1;
3395                 }
3396                 /* Re-configure 4 TCs BW */
3397                 for (i = 0; i < nb_tcs; i++) {
3398                         tc = &dcb_config->tc_config[i];
3399                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3400                                                 (uint8_t)(100 / nb_tcs);
3401                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3402                                                 (uint8_t)(100 / nb_tcs);
3403                 }
3404                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3405                         tc = &dcb_config->tc_config[i];
3406                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3407                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3408                 }
3409         }
3410
3411         if(config_dcb_rx) {
3412                 /* Set RX buffer size */
3413                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3414                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3415                 for (i = 0 ; i < nb_tcs; i++) {
3416                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3417                 }
3418                 /* zero alloc all unused TCs */
3419                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3420                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3421                 }
3422         }
3423         if(config_dcb_tx) {
3424                 /* Only support an equally distributed Tx packet buffer strategy. */
3425                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3426                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3427                 for (i = 0; i < nb_tcs; i++) {
3428                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3429                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3430                 }
3431                 /* Clear unused TCs, if any, to zero buffer size*/
3432                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3433                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3434                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3435                 }
3436         }
3437
3438         /*Calculates traffic class credits*/
3439         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3440                                 IXGBE_DCB_TX_CONFIG);
3441         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3442                                 IXGBE_DCB_RX_CONFIG);
3443
3444         if(config_dcb_rx) {
3445                 /* Unpack CEE standard containers */
3446                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3447                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3448                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3449                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3450                 /* Configure PG(ETS) RX */
3451                 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3452         }
3453
3454         if(config_dcb_tx) {
3455                 /* Unpack CEE standard containers */
3456                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3457                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3458                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3459                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3460                 /* Configure PG(ETS) TX */
3461                 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3462         }
3463
3464         /*Configure queue statistics registers*/
3465         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3466
3467         /* Check if the PFC is supported */
3468         if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3469                 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
3470                 for (i = 0; i < nb_tcs; i++) {
3471                         /*
3472                         * If the TC count is 8,and the default high_water is 48,
3473                         * the low_water is 16 as default.
3474                         */
3475                         hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3476                         hw->fc.low_water[i] = pbsize / 4;
3477                         /* Enable pfc for this TC */
3478                         tc = &dcb_config->tc_config[i];
3479                         tc->pfc = ixgbe_dcb_pfc_enabled;
3480                 }
3481                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3482                 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3483                         pfc_en &= 0x0F;
3484                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3485         }
3486
3487         return ret;
3488 }
3489
3490 /**
3491  * ixgbe_configure_dcb - Configure DCB  Hardware
3492  * @dev: pointer to rte_eth_dev
3493  */
3494 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3495 {
3496         struct ixgbe_dcb_config *dcb_cfg =
3497                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3498         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3499
3500         PMD_INIT_FUNC_TRACE();
3501
3502         /* check support mq_mode for DCB */
3503         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3504             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB))
3505                 return;
3506
3507         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3508                 return;
3509
3510         /** Configure DCB hardware **/
3511         ixgbe_dcb_hw_configure(dev,dcb_cfg);
3512
3513         return;
3514 }
3515
3516 /*
3517  * VMDq only support for 10 GbE NIC.
3518  */
3519 static void
3520 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3521 {
3522         struct rte_eth_vmdq_rx_conf *cfg;
3523         struct ixgbe_hw *hw;
3524         enum rte_eth_nb_pools num_pools;
3525         uint32_t mrqc, vt_ctl, vlanctrl;
3526         uint32_t vmolr = 0;
3527         int i;
3528
3529         PMD_INIT_FUNC_TRACE();
3530         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3531         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3532         num_pools = cfg->nb_queue_pools;
3533
3534         ixgbe_rss_disable(dev);
3535
3536         /* MRQC: enable vmdq */
3537         mrqc = IXGBE_MRQC_VMDQEN;
3538         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3539
3540         /* PFVTCTL: turn on virtualisation and set the default pool */
3541         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3542         if (cfg->enable_default_pool)
3543                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3544         else
3545                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3546
3547         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3548
3549         for (i = 0; i < (int)num_pools; i++) {
3550                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3551                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3552         }
3553
3554         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3555         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3556         vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3557         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3558
3559         /* VFTA - enable all vlan filters */
3560         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3561                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3562
3563         /* VFRE: pool enabling for receive - 64 */
3564         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3565         if (num_pools == ETH_64_POOLS)
3566                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3567
3568         /*
3569          * MPSAR - allow pools to read specific mac addresses
3570          * In this case, all pools should be able to read from mac addr 0
3571          */
3572         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3573         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3574
3575         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3576         for (i = 0; i < cfg->nb_pool_maps; i++) {
3577                 /* set vlan id in VF register and set the valid bit */
3578                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3579                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3580                 /*
3581                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3582                  * pools, we only need to use the first half of the register
3583                  * i.e. bits 0-31
3584                  */
3585                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3586                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3587                                         (cfg->pool_map[i].pools & UINT32_MAX));
3588                 else
3589                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3590                                         ((cfg->pool_map[i].pools >> 32) \
3591                                         & UINT32_MAX));
3592
3593         }
3594
3595         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3596         if (cfg->enable_loop_back) {
3597                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3598                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3599                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3600         }
3601
3602         IXGBE_WRITE_FLUSH(hw);
3603 }
3604
3605 /*
3606  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3607  * @hw: pointer to hardware structure
3608  */
3609 static void
3610 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3611 {
3612         uint32_t reg;
3613         uint32_t q;
3614
3615         PMD_INIT_FUNC_TRACE();
3616         /*PF VF Transmit Enable*/
3617         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3618         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3619
3620         /* Disable the Tx desc arbiter so that MTQC can be changed */
3621         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3622         reg |= IXGBE_RTTDCS_ARBDIS;
3623         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3624
3625         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3626         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3627
3628         /* Disable drop for all queues */
3629         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3630                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3631                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3632
3633         /* Enable the Tx desc arbiter */
3634         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3635         reg &= ~IXGBE_RTTDCS_ARBDIS;
3636         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3637
3638         IXGBE_WRITE_FLUSH(hw);
3639
3640         return;
3641 }
3642
3643 static int __attribute__((cold))
3644 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3645 {
3646         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3647         uint64_t dma_addr;
3648         unsigned i;
3649
3650         /* Initialize software ring entries */
3651         for (i = 0; i < rxq->nb_rx_desc; i++) {
3652                 volatile union ixgbe_adv_rx_desc *rxd;
3653                 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3654                 if (mbuf == NULL) {
3655                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3656                                      (unsigned) rxq->queue_id);
3657                         return (-ENOMEM);
3658                 }
3659
3660                 rte_mbuf_refcnt_set(mbuf, 1);
3661                 mbuf->next = NULL;
3662                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3663                 mbuf->nb_segs = 1;
3664                 mbuf->port = rxq->port_id;
3665
3666                 dma_addr =
3667                         rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3668                 rxd = &rxq->rx_ring[i];
3669                 rxd->read.hdr_addr = 0;
3670                 rxd->read.pkt_addr = dma_addr;
3671                 rxe[i].mbuf = mbuf;
3672         }
3673
3674         return 0;
3675 }
3676
3677 static int
3678 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3679 {
3680         struct ixgbe_hw *hw;
3681         uint32_t mrqc;
3682
3683         ixgbe_rss_configure(dev);
3684
3685         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3686
3687         /* MRQC: enable VF RSS */
3688         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3689         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3690         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3691         case ETH_64_POOLS:
3692                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3693                 break;
3694
3695         case ETH_32_POOLS:
3696                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3697                 break;
3698
3699         default:
3700                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3701                 return -EINVAL;
3702         }
3703
3704         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3705
3706         return 0;
3707 }
3708
3709 static int
3710 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3711 {
3712         struct ixgbe_hw *hw =
3713                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3714
3715         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3716         case ETH_64_POOLS:
3717                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3718                         IXGBE_MRQC_VMDQEN);
3719                 break;
3720
3721         case ETH_32_POOLS:
3722                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3723                         IXGBE_MRQC_VMDQRT4TCEN);
3724                 break;
3725
3726         case ETH_16_POOLS:
3727                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3728                         IXGBE_MRQC_VMDQRT8TCEN);
3729                 break;
3730         default:
3731                 PMD_INIT_LOG(ERR,
3732                         "invalid pool number in IOV mode");
3733                 break;
3734         }
3735         return 0;
3736 }
3737
3738 static int
3739 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3740 {
3741         struct ixgbe_hw *hw =
3742                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3743
3744         if (hw->mac.type == ixgbe_mac_82598EB)
3745                 return 0;
3746
3747         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3748                 /*
3749                  * SRIOV inactive scheme
3750                  * any DCB/RSS w/o VMDq multi-queue setting
3751                  */
3752                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3753                         case ETH_MQ_RX_RSS:
3754                                 ixgbe_rss_configure(dev);
3755                                 break;
3756
3757                         case ETH_MQ_RX_VMDQ_DCB:
3758                                 ixgbe_vmdq_dcb_configure(dev);
3759                                 break;
3760
3761                         case ETH_MQ_RX_VMDQ_ONLY:
3762                                 ixgbe_vmdq_rx_hw_configure(dev);
3763                                 break;
3764
3765                         case ETH_MQ_RX_NONE:
3766                                 /* if mq_mode is none, disable rss mode.*/
3767                         default: ixgbe_rss_disable(dev);
3768                 }
3769         } else {
3770                 /*
3771                  * SRIOV active scheme
3772                  * Support RSS together with VMDq & SRIOV
3773                  */
3774                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3775                 case ETH_MQ_RX_RSS:
3776                 case ETH_MQ_RX_VMDQ_RSS:
3777                         ixgbe_config_vf_rss(dev);
3778                         break;
3779
3780                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3781                 case ETH_MQ_RX_VMDQ_DCB:
3782                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3783                         PMD_INIT_LOG(ERR,
3784                                 "Could not support DCB with VMDq & SRIOV");
3785                         return -1;
3786                 default:
3787                         ixgbe_config_vf_default(dev);
3788                         break;
3789                 }
3790         }
3791
3792         return 0;
3793 }
3794
3795 static int
3796 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3797 {
3798         struct ixgbe_hw *hw =
3799                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3800         uint32_t mtqc;
3801         uint32_t rttdcs;
3802
3803         if (hw->mac.type == ixgbe_mac_82598EB)
3804                 return 0;
3805
3806         /* disable arbiter before setting MTQC */
3807         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3808         rttdcs |= IXGBE_RTTDCS_ARBDIS;
3809         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3810
3811         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3812                 /*
3813                  * SRIOV inactive scheme
3814                  * any DCB w/o VMDq multi-queue setting
3815                  */
3816                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3817                         ixgbe_vmdq_tx_hw_configure(hw);
3818                 else {
3819                         mtqc = IXGBE_MTQC_64Q_1PB;
3820                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3821                 }
3822         } else {
3823                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3824
3825                 /*
3826                  * SRIOV active scheme
3827                  * FIXME if support DCB together with VMDq & SRIOV
3828                  */
3829                 case ETH_64_POOLS:
3830                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3831                         break;
3832                 case ETH_32_POOLS:
3833                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3834                         break;
3835                 case ETH_16_POOLS:
3836                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3837                                 IXGBE_MTQC_8TC_8TQ;
3838                         break;
3839                 default:
3840                         mtqc = IXGBE_MTQC_64Q_1PB;
3841                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3842                 }
3843                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3844         }
3845
3846         /* re-enable arbiter */
3847         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3848         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3849
3850         return 0;
3851 }
3852
3853 /**
3854  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3855  *
3856  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3857  * spec rev. 3.0 chapter 8.2.3.8.13.
3858  *
3859  * @pool Memory pool of the Rx queue
3860  */
3861 static inline uint32_t
3862 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3863 {
3864         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3865
3866         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3867         uint16_t maxdesc =
3868                 IPV4_MAX_PKT_LEN /
3869                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3870
3871         if (maxdesc >= 16)
3872                 return IXGBE_RSCCTL_MAXDESC_16;
3873         else if (maxdesc >= 8)
3874                 return IXGBE_RSCCTL_MAXDESC_8;
3875         else if (maxdesc >= 4)
3876                 return IXGBE_RSCCTL_MAXDESC_4;
3877         else
3878                 return IXGBE_RSCCTL_MAXDESC_1;
3879 }
3880
3881 /**
3882  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3883  * interrupt
3884  *
3885  * (Taken from FreeBSD tree)
3886  * (yes this is all very magic and confusing :)
3887  *
3888  * @dev port handle
3889  * @entry the register array entry
3890  * @vector the MSIX vector for this queue
3891  * @type RX/TX/MISC
3892  */
3893 static void
3894 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3895 {
3896         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3897         u32 ivar, index;
3898
3899         vector |= IXGBE_IVAR_ALLOC_VAL;
3900
3901         switch (hw->mac.type) {
3902
3903         case ixgbe_mac_82598EB:
3904                 if (type == -1)
3905                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3906                 else
3907                         entry += (type * 64);
3908                 index = (entry >> 2) & 0x1F;
3909                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3910                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3911                 ivar |= (vector << (8 * (entry & 0x3)));
3912                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3913                 break;
3914
3915         case ixgbe_mac_82599EB:
3916         case ixgbe_mac_X540:
3917                 if (type == -1) { /* MISC IVAR */
3918                         index = (entry & 1) * 8;
3919                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3920                         ivar &= ~(0xFF << index);
3921                         ivar |= (vector << index);
3922                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3923                 } else {        /* RX/TX IVARS */
3924                         index = (16 * (entry & 1)) + (8 * type);
3925                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3926                         ivar &= ~(0xFF << index);
3927                         ivar |= (vector << index);
3928                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3929                 }
3930
3931                 break;
3932
3933         default:
3934                 break;
3935         }
3936 }
3937
3938 void __attribute__((cold))
3939 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3940 {
3941         uint16_t i, rx_using_sse;
3942         struct ixgbe_adapter *adapter =
3943                 (struct ixgbe_adapter *)dev->data->dev_private;
3944
3945         /*
3946          * In order to allow Vector Rx there are a few configuration
3947          * conditions to be met and Rx Bulk Allocation should be allowed.
3948          */
3949         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3950             !adapter->rx_bulk_alloc_allowed) {
3951                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3952                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
3953                                     "not enabled",
3954                              dev->data->port_id);
3955
3956                 adapter->rx_vec_allowed = false;
3957         }
3958
3959         /*
3960          * Initialize the appropriate LRO callback.
3961          *
3962          * If all queues satisfy the bulk allocation preconditions
3963          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3964          * Otherwise use a single allocation version.
3965          */
3966         if (dev->data->lro) {
3967                 if (adapter->rx_bulk_alloc_allowed) {
3968                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3969                                            "allocation version");
3970                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3971                 } else {
3972                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3973                                            "allocation version");
3974                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3975                 }
3976         } else if (dev->data->scattered_rx) {
3977                 /*
3978                  * Set the non-LRO scattered callback: there are Vector and
3979                  * single allocation versions.
3980                  */
3981                 if (adapter->rx_vec_allowed) {
3982                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3983                                             "callback (port=%d).",
3984                                      dev->data->port_id);
3985
3986                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3987                 } else if (adapter->rx_bulk_alloc_allowed) {
3988                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3989                                            "allocation callback (port=%d).",
3990                                      dev->data->port_id);
3991                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3992                 } else {
3993                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3994                                             "single allocation) "
3995                                             "Scattered Rx callback "
3996                                             "(port=%d).",
3997                                      dev->data->port_id);
3998
3999                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4000                 }
4001         /*
4002          * Below we set "simple" callbacks according to port/queues parameters.
4003          * If parameters allow we are going to choose between the following
4004          * callbacks:
4005          *    - Vector
4006          *    - Bulk Allocation
4007          *    - Single buffer allocation (the simplest one)
4008          */
4009         } else if (adapter->rx_vec_allowed) {
4010                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4011                                    "burst size no less than 32.");
4012
4013                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4014         } else if (adapter->rx_bulk_alloc_allowed) {
4015                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4016                                     "satisfied. Rx Burst Bulk Alloc function "
4017                                     "will be used on port=%d.",
4018                              dev->data->port_id);
4019
4020                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4021         } else {
4022                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4023                                     "satisfied, or Scattered Rx is requested "
4024                                     "(port=%d).",
4025                              dev->data->port_id);
4026
4027                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4028         }
4029
4030         /* Propagate information about RX function choice through all queues. */
4031
4032         rx_using_sse =
4033                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4034                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4035
4036         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4037                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4038                 rxq->rx_using_sse = rx_using_sse;
4039         }
4040 }
4041
4042 /**
4043  * ixgbe_set_rsc - configure RSC related port HW registers
4044  *
4045  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4046  * of 82599 Spec (x540 configuration is virtually the same).
4047  *
4048  * @dev port handle
4049  *
4050  * Returns 0 in case of success or a non-zero error code
4051  */
4052 static int
4053 ixgbe_set_rsc(struct rte_eth_dev *dev)
4054 {
4055         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4056         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4057         struct rte_eth_dev_info dev_info = { 0 };
4058         bool rsc_capable = false;
4059         uint16_t i;
4060         uint32_t rdrxctl;
4061
4062         /* Sanity check */
4063         dev->dev_ops->dev_infos_get(dev, &dev_info);
4064         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4065                 rsc_capable = true;
4066
4067         if (!rsc_capable && rx_conf->enable_lro) {
4068                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4069                                    "support it");
4070                 return -EINVAL;
4071         }
4072
4073         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4074
4075         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4076                 /*
4077                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4078                  * 3.0 RSC configuration requires HW CRC stripping being
4079                  * enabled. If user requested both HW CRC stripping off
4080                  * and RSC on - return an error.
4081                  */
4082                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4083                                     "is disabled");
4084                 return -EINVAL;
4085         }
4086
4087         /* RFCTL configuration  */
4088         if (rsc_capable) {
4089                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4090                 if (rx_conf->enable_lro)
4091                         /*
4092                          * Since NFS packets coalescing is not supported - clear
4093                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4094                          * enabled.
4095                          */
4096                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4097                                    IXGBE_RFCTL_NFSR_DIS);
4098                 else
4099                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4100
4101                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4102         }
4103
4104         /* If LRO hasn't been requested - we are done here. */
4105         if (!rx_conf->enable_lro)
4106                 return 0;
4107
4108         /* Set RDRXCTL.RSCACKC bit */
4109         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4110         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4111         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4112
4113         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4114         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4115                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4116                 uint32_t srrctl =
4117                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4118                 uint32_t rscctl =
4119                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4120                 uint32_t psrtype =
4121                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4122                 uint32_t eitr =
4123                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4124
4125                 /*
4126                  * ixgbe PMD doesn't support header-split at the moment.
4127                  *
4128                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4129                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4130                  * should be configured even if header split is not
4131                  * enabled. We will configure it 128 bytes following the
4132                  * recommendation in the spec.
4133                  */
4134                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4135                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4136                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4137
4138                 /*
4139                  * TODO: Consider setting the Receive Descriptor Minimum
4140                  * Threshold Size for an RSC case. This is not an obviously
4141                  * beneficiary option but the one worth considering...
4142                  */
4143
4144                 rscctl |= IXGBE_RSCCTL_RSCEN;
4145                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4146                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4147
4148                 /*
4149                  * RSC: Set ITR interval corresponding to 2K ints/s.
4150                  *
4151                  * Full-sized RSC aggregations for a 10Gb/s link will
4152                  * arrive at about 20K aggregation/s rate.
4153                  *
4154                  * 2K inst/s rate will make only 10% of the
4155                  * aggregations to be closed due to the interrupt timer
4156                  * expiration for a streaming at wire-speed case.
4157                  *
4158                  * For a sparse streaming case this setting will yield
4159                  * at most 500us latency for a single RSC aggregation.
4160                  */
4161                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4162                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4163
4164                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4165                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4166                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4167                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4168
4169                 /*
4170                  * RSC requires the mapping of the queue to the
4171                  * interrupt vector.
4172                  */
4173                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4174         }
4175
4176         dev->data->lro = 1;
4177
4178         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4179
4180         return 0;
4181 }
4182
4183 /*
4184  * Initializes Receive Unit.
4185  */
4186 int __attribute__((cold))
4187 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4188 {
4189         struct ixgbe_hw     *hw;
4190         struct ixgbe_rx_queue *rxq;
4191         uint64_t bus_addr;
4192         uint32_t rxctrl;
4193         uint32_t fctrl;
4194         uint32_t hlreg0;
4195         uint32_t maxfrs;
4196         uint32_t srrctl;
4197         uint32_t rdrxctl;
4198         uint32_t rxcsum;
4199         uint16_t buf_size;
4200         uint16_t i;
4201         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4202         int rc;
4203
4204         PMD_INIT_FUNC_TRACE();
4205         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4206
4207         /*
4208          * Make sure receives are disabled while setting
4209          * up the RX context (registers, descriptor rings, etc.).
4210          */
4211         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4212         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4213
4214         /* Enable receipt of broadcasted frames */
4215         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4216         fctrl |= IXGBE_FCTRL_BAM;
4217         fctrl |= IXGBE_FCTRL_DPF;
4218         fctrl |= IXGBE_FCTRL_PMCF;
4219         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4220
4221         /*
4222          * Configure CRC stripping, if any.
4223          */
4224         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4225         if (rx_conf->hw_strip_crc)
4226                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4227         else
4228                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4229
4230         /*
4231          * Configure jumbo frame support, if any.
4232          */
4233         if (rx_conf->jumbo_frame == 1) {
4234                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4235                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4236                 maxfrs &= 0x0000FFFF;
4237                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4238                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4239         } else
4240                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4241
4242         /*
4243          * If loopback mode is configured for 82599, set LPBK bit.
4244          */
4245         if (hw->mac.type == ixgbe_mac_82599EB &&
4246                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4247                 hlreg0 |= IXGBE_HLREG0_LPBK;
4248         else
4249                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4250
4251         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4252
4253         /* Setup RX queues */
4254         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4255                 rxq = dev->data->rx_queues[i];
4256
4257                 /*
4258                  * Reset crc_len in case it was changed after queue setup by a
4259                  * call to configure.
4260                  */
4261                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4262
4263                 /* Setup the Base and Length of the Rx Descriptor Rings */
4264                 bus_addr = rxq->rx_ring_phys_addr;
4265                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4266                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4267                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4268                                 (uint32_t)(bus_addr >> 32));
4269                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4270                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4271                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4272                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4273
4274                 /* Configure the SRRCTL register */
4275 #ifdef RTE_HEADER_SPLIT_ENABLE
4276                 /*
4277                  * Configure Header Split
4278                  */
4279                 if (rx_conf->header_split) {
4280                         if (hw->mac.type == ixgbe_mac_82599EB) {
4281                                 /* Must setup the PSRTYPE register */
4282                                 uint32_t psrtype;
4283                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4284                                         IXGBE_PSRTYPE_UDPHDR   |
4285                                         IXGBE_PSRTYPE_IPV4HDR  |
4286                                         IXGBE_PSRTYPE_IPV6HDR;
4287                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4288                         }
4289                         srrctl = ((rx_conf->split_hdr_size <<
4290                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4291                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4292                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4293                 } else
4294 #endif
4295                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4296
4297                 /* Set if packets are dropped when no descriptors available */
4298                 if (rxq->drop_en)
4299                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4300
4301                 /*
4302                  * Configure the RX buffer size in the BSIZEPACKET field of
4303                  * the SRRCTL register of the queue.
4304                  * The value is in 1 KB resolution. Valid values can be from
4305                  * 1 KB to 16 KB.
4306                  */
4307                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4308                         RTE_PKTMBUF_HEADROOM);
4309                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4310                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4311
4312                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4313
4314                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4315                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4316
4317                 /* It adds dual VLAN length for supporting dual VLAN */
4318                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4319                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4320                         dev->data->scattered_rx = 1;
4321         }
4322
4323         if (rx_conf->enable_scatter)
4324                 dev->data->scattered_rx = 1;
4325
4326         /*
4327          * Device configured with multiple RX queues.
4328          */
4329         ixgbe_dev_mq_rx_configure(dev);
4330
4331         /*
4332          * Setup the Checksum Register.
4333          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4334          * Enable IP/L4 checkum computation by hardware if requested to do so.
4335          */
4336         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4337         rxcsum |= IXGBE_RXCSUM_PCSD;
4338         if (rx_conf->hw_ip_checksum)
4339                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4340         else
4341                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4342
4343         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4344
4345         if (hw->mac.type == ixgbe_mac_82599EB ||
4346             hw->mac.type == ixgbe_mac_X540) {
4347                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4348                 if (rx_conf->hw_strip_crc)
4349                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4350                 else
4351                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4352                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4353                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4354         }
4355
4356         rc = ixgbe_set_rsc(dev);
4357         if (rc)
4358                 return rc;
4359
4360         ixgbe_set_rx_function(dev);
4361
4362         return 0;
4363 }
4364
4365 /*
4366  * Initializes Transmit Unit.
4367  */
4368 void __attribute__((cold))
4369 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4370 {
4371         struct ixgbe_hw     *hw;
4372         struct ixgbe_tx_queue *txq;
4373         uint64_t bus_addr;
4374         uint32_t hlreg0;
4375         uint32_t txctrl;
4376         uint16_t i;
4377
4378         PMD_INIT_FUNC_TRACE();
4379         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4380
4381         /* Enable TX CRC (checksum offload requirement) and hw padding
4382          * (TSO requirement) */
4383         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4384         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4385         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4386
4387         /* Setup the Base and Length of the Tx Descriptor Rings */
4388         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4389                 txq = dev->data->tx_queues[i];
4390
4391                 bus_addr = txq->tx_ring_phys_addr;
4392                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4393                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4394                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4395                                 (uint32_t)(bus_addr >> 32));
4396                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4397                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4398                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4399                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4400                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4401
4402                 /*
4403                  * Disable Tx Head Writeback RO bit, since this hoses
4404                  * bookkeeping if things aren't delivered in order.
4405                  */
4406                 switch (hw->mac.type) {
4407                         case ixgbe_mac_82598EB:
4408                                 txctrl = IXGBE_READ_REG(hw,
4409                                                         IXGBE_DCA_TXCTRL(txq->reg_idx));
4410                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4411                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4412                                                 txctrl);
4413                                 break;
4414
4415                         case ixgbe_mac_82599EB:
4416                         case ixgbe_mac_X540:
4417                         case ixgbe_mac_X550:
4418                         case ixgbe_mac_X550EM_x:
4419                         default:
4420                                 txctrl = IXGBE_READ_REG(hw,
4421                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4422                                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4423                                 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4424                                                 txctrl);
4425                                 break;
4426                 }
4427         }
4428
4429         /* Device configured with multiple TX queues. */
4430         ixgbe_dev_mq_tx_configure(dev);
4431 }
4432
4433 /*
4434  * Set up link for 82599 loopback mode Tx->Rx.
4435  */
4436 static inline void __attribute__((cold))
4437 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4438 {
4439         PMD_INIT_FUNC_TRACE();
4440
4441         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4442                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4443                                 IXGBE_SUCCESS) {
4444                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4445                         /* ignore error */
4446                         return;
4447                 }
4448         }
4449
4450         /* Restart link */
4451         IXGBE_WRITE_REG(hw,
4452                         IXGBE_AUTOC,
4453                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4454         ixgbe_reset_pipeline_82599(hw);
4455
4456         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4457         msec_delay(50);
4458 }
4459
4460
4461 /*
4462  * Start Transmit and Receive Units.
4463  */
4464 int __attribute__((cold))
4465 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4466 {
4467         struct ixgbe_hw     *hw;
4468         struct ixgbe_tx_queue *txq;
4469         struct ixgbe_rx_queue *rxq;
4470         uint32_t txdctl;
4471         uint32_t dmatxctl;
4472         uint32_t rxctrl;
4473         uint16_t i;
4474         int ret = 0;
4475
4476         PMD_INIT_FUNC_TRACE();
4477         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4478
4479         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4480                 txq = dev->data->tx_queues[i];
4481                 /* Setup Transmit Threshold Registers */
4482                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4483                 txdctl |= txq->pthresh & 0x7F;
4484                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4485                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4486                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4487         }
4488
4489         if (hw->mac.type != ixgbe_mac_82598EB) {
4490                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4491                 dmatxctl |= IXGBE_DMATXCTL_TE;
4492                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4493         }
4494
4495         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4496                 txq = dev->data->tx_queues[i];
4497                 if (!txq->tx_deferred_start) {
4498                         ret = ixgbe_dev_tx_queue_start(dev, i);
4499                         if (ret < 0)
4500                                 return ret;
4501                 }
4502         }
4503
4504         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4505                 rxq = dev->data->rx_queues[i];
4506                 if (!rxq->rx_deferred_start) {
4507                         ret = ixgbe_dev_rx_queue_start(dev, i);
4508                         if (ret < 0)
4509                                 return ret;
4510                 }
4511         }
4512
4513         /* Enable Receive engine */
4514         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4515         if (hw->mac.type == ixgbe_mac_82598EB)
4516                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4517         rxctrl |= IXGBE_RXCTRL_RXEN;
4518         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4519
4520         /* If loopback mode is enabled for 82599, set up the link accordingly */
4521         if (hw->mac.type == ixgbe_mac_82599EB &&
4522                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4523                 ixgbe_setup_loopback_link_82599(hw);
4524
4525         return 0;
4526 }
4527
4528 /*
4529  * Start Receive Units for specified queue.
4530  */
4531 int __attribute__((cold))
4532 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4533 {
4534         struct ixgbe_hw     *hw;
4535         struct ixgbe_rx_queue *rxq;
4536         uint32_t rxdctl;
4537         int poll_ms;
4538
4539         PMD_INIT_FUNC_TRACE();
4540         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4541
4542         if (rx_queue_id < dev->data->nb_rx_queues) {
4543                 rxq = dev->data->rx_queues[rx_queue_id];
4544
4545                 /* Allocate buffers for descriptor rings */
4546                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4547                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4548                                      rx_queue_id);
4549                         return -1;
4550                 }
4551                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4552                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4553                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4554
4555                 /* Wait until RX Enable ready */
4556                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4557                 do {
4558                         rte_delay_ms(1);
4559                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4560                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4561                 if (!poll_ms)
4562                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4563                                      rx_queue_id);
4564                 rte_wmb();
4565                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4566                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4567         } else
4568                 return -1;
4569
4570         return 0;
4571 }
4572
4573 /*
4574  * Stop Receive Units for specified queue.
4575  */
4576 int __attribute__((cold))
4577 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4578 {
4579         struct ixgbe_hw     *hw;
4580         struct ixgbe_adapter *adapter =
4581                 (struct ixgbe_adapter *)dev->data->dev_private;
4582         struct ixgbe_rx_queue *rxq;
4583         uint32_t rxdctl;
4584         int poll_ms;
4585
4586         PMD_INIT_FUNC_TRACE();
4587         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4588
4589         if (rx_queue_id < dev->data->nb_rx_queues) {
4590                 rxq = dev->data->rx_queues[rx_queue_id];
4591
4592                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4593                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4594                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4595
4596                 /* Wait until RX Enable ready */
4597                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4598                 do {
4599                         rte_delay_ms(1);
4600                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4601                 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4602                 if (!poll_ms)
4603                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4604                                      rx_queue_id);
4605
4606                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4607
4608                 ixgbe_rx_queue_release_mbufs(rxq);
4609                 ixgbe_reset_rx_queue(adapter, rxq);
4610         } else
4611                 return -1;
4612
4613         return 0;
4614 }
4615
4616
4617 /*
4618  * Start Transmit Units for specified queue.
4619  */
4620 int __attribute__((cold))
4621 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4622 {
4623         struct ixgbe_hw     *hw;
4624         struct ixgbe_tx_queue *txq;
4625         uint32_t txdctl;
4626         int poll_ms;
4627
4628         PMD_INIT_FUNC_TRACE();
4629         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4630
4631         if (tx_queue_id < dev->data->nb_tx_queues) {
4632                 txq = dev->data->tx_queues[tx_queue_id];
4633                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4634                 txdctl |= IXGBE_TXDCTL_ENABLE;
4635                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4636
4637                 /* Wait until TX Enable ready */
4638                 if (hw->mac.type == ixgbe_mac_82599EB) {
4639                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4640                         do {
4641                                 rte_delay_ms(1);
4642                                 txdctl = IXGBE_READ_REG(hw,
4643                                         IXGBE_TXDCTL(txq->reg_idx));
4644                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4645                         if (!poll_ms)
4646                                 PMD_INIT_LOG(ERR, "Could not enable "
4647                                              "Tx Queue %d", tx_queue_id);
4648                 }
4649                 rte_wmb();
4650                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4651                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4652         } else
4653                 return -1;
4654
4655         return 0;
4656 }
4657
4658 /*
4659  * Stop Transmit Units for specified queue.
4660  */
4661 int __attribute__((cold))
4662 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4663 {
4664         struct ixgbe_hw     *hw;
4665         struct ixgbe_tx_queue *txq;
4666         uint32_t txdctl;
4667         uint32_t txtdh, txtdt;
4668         int poll_ms;
4669
4670         PMD_INIT_FUNC_TRACE();
4671         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4672
4673         if (tx_queue_id < dev->data->nb_tx_queues) {
4674                 txq = dev->data->tx_queues[tx_queue_id];
4675
4676                 /* Wait until TX queue is empty */
4677                 if (hw->mac.type == ixgbe_mac_82599EB) {
4678                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4679                         do {
4680                                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4681                                 txtdh = IXGBE_READ_REG(hw,
4682                                                 IXGBE_TDH(txq->reg_idx));
4683                                 txtdt = IXGBE_READ_REG(hw,
4684                                                 IXGBE_TDT(txq->reg_idx));
4685                         } while (--poll_ms && (txtdh != txtdt));
4686                         if (!poll_ms)
4687                                 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4688                                              "when stopping.", tx_queue_id);
4689                 }
4690
4691                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4692                 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4693                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4694
4695                 /* Wait until TX Enable ready */
4696                 if (hw->mac.type == ixgbe_mac_82599EB) {
4697                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4698                         do {
4699                                 rte_delay_ms(1);
4700                                 txdctl = IXGBE_READ_REG(hw,
4701                                                 IXGBE_TXDCTL(txq->reg_idx));
4702                         } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4703                         if (!poll_ms)
4704                                 PMD_INIT_LOG(ERR, "Could not disable "
4705                                              "Tx Queue %d", tx_queue_id);
4706                 }
4707
4708                 if (txq->ops != NULL) {
4709                         txq->ops->release_mbufs(txq);
4710                         txq->ops->reset(txq);
4711                 }
4712         } else
4713                 return -1;
4714
4715         return 0;
4716 }
4717
4718 /*
4719  * [VF] Initializes Receive Unit.
4720  */
4721 int __attribute__((cold))
4722 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4723 {
4724         struct ixgbe_hw     *hw;
4725         struct ixgbe_rx_queue *rxq;
4726         uint64_t bus_addr;
4727         uint32_t srrctl, psrtype = 0;
4728         uint16_t buf_size;
4729         uint16_t i;
4730         int ret;
4731
4732         PMD_INIT_FUNC_TRACE();
4733         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4734
4735         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4736                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4737                         "it should be power of 2");
4738                 return -1;
4739         }
4740
4741         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4742                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4743                         "it should be equal to or less than %d",
4744                         hw->mac.max_rx_queues);
4745                 return -1;
4746         }
4747
4748         /*
4749          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4750          * disables the VF receipt of packets if the PF MTU is > 1500.
4751          * This is done to deal with 82599 limitations that imposes
4752          * the PF and all VFs to share the same MTU.
4753          * Then, the PF driver enables again the VF receipt of packet when
4754          * the VF driver issues a IXGBE_VF_SET_LPE request.
4755          * In the meantime, the VF device cannot be used, even if the VF driver
4756          * and the Guest VM network stack are ready to accept packets with a
4757          * size up to the PF MTU.
4758          * As a work-around to this PF behaviour, force the call to
4759          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4760          * VF packets received can work in all cases.
4761          */
4762         ixgbevf_rlpml_set_vf(hw,
4763                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4764
4765         /* Setup RX queues */
4766         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4767                 rxq = dev->data->rx_queues[i];
4768
4769                 /* Allocate buffers for descriptor rings */
4770                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4771                 if (ret)
4772                         return ret;
4773
4774                 /* Setup the Base and Length of the Rx Descriptor Rings */
4775                 bus_addr = rxq->rx_ring_phys_addr;
4776
4777                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4778                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4779                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4780                                 (uint32_t)(bus_addr >> 32));
4781                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4782                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4783                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4784                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4785
4786
4787                 /* Configure the SRRCTL register */
4788 #ifdef RTE_HEADER_SPLIT_ENABLE
4789                 /*
4790                  * Configure Header Split
4791                  */
4792                 if (dev->data->dev_conf.rxmode.header_split) {
4793                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4794                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4795                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4796                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4797                 } else
4798 #endif
4799                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4800
4801                 /* Set if packets are dropped when no descriptors available */
4802                 if (rxq->drop_en)
4803                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4804
4805                 /*
4806                  * Configure the RX buffer size in the BSIZEPACKET field of
4807                  * the SRRCTL register of the queue.
4808                  * The value is in 1 KB resolution. Valid values can be from
4809                  * 1 KB to 16 KB.
4810                  */
4811                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4812                         RTE_PKTMBUF_HEADROOM);
4813                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4814                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4815
4816                 /*
4817                  * VF modification to write virtual function SRRCTL register
4818                  */
4819                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4820
4821                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4822                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4823
4824                 if (dev->data->dev_conf.rxmode.enable_scatter ||
4825                     /* It adds dual VLAN length for supporting dual VLAN */
4826                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4827                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4828                         if (!dev->data->scattered_rx)
4829                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4830                         dev->data->scattered_rx = 1;
4831                 }
4832         }
4833
4834 #ifdef RTE_HEADER_SPLIT_ENABLE
4835         if (dev->data->dev_conf.rxmode.header_split)
4836                 /* Must setup the PSRTYPE register */
4837                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4838                         IXGBE_PSRTYPE_UDPHDR   |
4839                         IXGBE_PSRTYPE_IPV4HDR  |
4840                         IXGBE_PSRTYPE_IPV6HDR;
4841 #endif
4842
4843         /* Set RQPL for VF RSS according to max Rx queue */
4844         psrtype |= (dev->data->nb_rx_queues >> 1) <<
4845                 IXGBE_PSRTYPE_RQPL_SHIFT;
4846         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4847
4848         ixgbe_set_rx_function(dev);
4849
4850         return 0;
4851 }
4852
4853 /*
4854  * [VF] Initializes Transmit Unit.
4855  */
4856 void __attribute__((cold))
4857 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4858 {
4859         struct ixgbe_hw     *hw;
4860         struct ixgbe_tx_queue *txq;
4861         uint64_t bus_addr;
4862         uint32_t txctrl;
4863         uint16_t i;
4864
4865         PMD_INIT_FUNC_TRACE();
4866         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4867
4868         /* Setup the Base and Length of the Tx Descriptor Rings */
4869         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4870                 txq = dev->data->tx_queues[i];
4871                 bus_addr = txq->tx_ring_phys_addr;
4872                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4873                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4874                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4875                                 (uint32_t)(bus_addr >> 32));
4876                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4877                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4878                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4879                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4880                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4881
4882                 /*
4883                  * Disable Tx Head Writeback RO bit, since this hoses
4884                  * bookkeeping if things aren't delivered in order.
4885                  */
4886                 txctrl = IXGBE_READ_REG(hw,
4887                                 IXGBE_VFDCA_TXCTRL(i));
4888                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4889                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4890                                 txctrl);
4891         }
4892 }
4893
4894 /*
4895  * [VF] Start Transmit and Receive Units.
4896  */
4897 void __attribute__((cold))
4898 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4899 {
4900         struct ixgbe_hw     *hw;
4901         struct ixgbe_tx_queue *txq;
4902         struct ixgbe_rx_queue *rxq;
4903         uint32_t txdctl;
4904         uint32_t rxdctl;
4905         uint16_t i;
4906         int poll_ms;
4907
4908         PMD_INIT_FUNC_TRACE();
4909         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4910
4911         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4912                 txq = dev->data->tx_queues[i];
4913                 /* Setup Transmit Threshold Registers */
4914                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4915                 txdctl |= txq->pthresh & 0x7F;
4916                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4917                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4918                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4919         }
4920
4921         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4922
4923                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4924                 txdctl |= IXGBE_TXDCTL_ENABLE;
4925                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4926
4927                 poll_ms = 10;
4928                 /* Wait until TX Enable ready */
4929                 do {
4930                         rte_delay_ms(1);
4931                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4932                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4933                 if (!poll_ms)
4934                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4935         }
4936         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4937
4938                 rxq = dev->data->rx_queues[i];
4939
4940                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4941                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4942                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4943
4944                 /* Wait until RX Enable ready */
4945                 poll_ms = 10;
4946                 do {
4947                         rte_delay_ms(1);
4948                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4949                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4950                 if (!poll_ms)
4951                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4952                 rte_wmb();
4953                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4954
4955         }
4956 }
4957
4958 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4959 int __attribute__((weak))
4960 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4961 {
4962         return -1;
4963 }
4964
4965 uint16_t __attribute__((weak))
4966 ixgbe_recv_pkts_vec(
4967         void __rte_unused *rx_queue,
4968         struct rte_mbuf __rte_unused **rx_pkts,
4969         uint16_t __rte_unused nb_pkts)
4970 {
4971         return 0;
4972 }
4973
4974 uint16_t __attribute__((weak))
4975 ixgbe_recv_scattered_pkts_vec(
4976         void __rte_unused *rx_queue,
4977         struct rte_mbuf __rte_unused **rx_pkts,
4978         uint16_t __rte_unused nb_pkts)
4979 {
4980         return 0;
4981 }
4982
4983 int __attribute__((weak))
4984 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
4985 {
4986         return -1;
4987 }